-
Notifications
You must be signed in to change notification settings - Fork 14.3k
LoopVectorize: add negative test for lrint, llrint #70211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesWith the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), it is now possible to vectorize llvm.lrint and llvm.llrint with a trivial change to VectorUtils. In preparation for this change, and the corresponding test update, add a negative test for lrint and llrint. -- 8< -- Patch is 248.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70211.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index b2ba7cfbfa3a4d0..8da116067537a91 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,11 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-;CHECK-LABEL: @sqrt_f32(
-;CHECK: llvm.sqrt.v4f32
-;CHECK: ret void
-define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sqrt_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -14,7 +60,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
- %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
+ %call = tail call float @llvm.sqrt.f32(float %0)
%arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
store float %call, ptr %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -26,12 +72,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare float @llvm.sqrt.f32(float) nounwind readnone
-
-;CHECK-LABEL: @sqrt_f64(
-;CHECK: llvm.sqrt.v4f64
-;CHECK: ret void
-define void @sqrt_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare float @llvm.sqrt.f32(float)
+
+define void @sqrt_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sqrt_f64(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 32
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -40,7 +133,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 8
- %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
+ %call = tail call double @llvm.sqrt.f64(double %0)
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
store double %call, ptr %arrayidx2, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -52,12 +145,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-;CHECK-LABEL: @sin_f32(
-;CHECK: llvm.sin.v4f32
-;CHECK: ret void
-define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare double @llvm.sqrt.f64(double)
+
+define void @sin_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sin_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.sin.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -66,7 +206,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
- %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
+ %call = tail call float @llvm.sin.f32(float %0)
%arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
store float %call, ptr %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -78,12 +218,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare float @llvm.sin.f32(float) nounwind readnone
-
-;CHECK-LABEL: @sin_f64(
-;CHECK: llvm.sin.v4f64
-;CHECK: ret void
-define void @sin_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare float @llvm.sin.f32(float)
+
+define void @sin_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sin_f64(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 32
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.sin.v4f64(<4 x double> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -92,7 +279,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 8
- %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
+ %call = tail call double @llvm.sin.f64(double %0)
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
store double %call, ptr %arrayidx2, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -104,12 +291,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare double @llvm.sin.f64(double) nounwind readnone
-
-;CHECK-LABEL: @cos_f32(
-;CHECK: llvm.cos.v4f32
-;CHECK: ret void
-define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare double @llvm.sin.f64(double)
+
+define void @cos_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @cos_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.cos.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds fl...
[truncated]
|
d7af63e
to
818a47b
Compare
With the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), it is now possible to vectorize llvm.lrint and llvm.llrint with a trivial change to VectorUtils. In preparation for this change, and the corresponding test update, add a negative test for lrint and llrint.
818a47b
to
bfa5d78
Compare
As #70202 has landed, this patch is ready for independent review. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
With the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), it is now possible to vectorize llvm.lrint and llvm.llrint with a trivial change to VectorUtils. In preparation for this change, and the corresponding test update, add a negative test for lrint and llrint.