-
Notifications
You must be signed in to change notification settings - Fork 14.3k
LoopVectorize/test: add missing CHECK lines, cleanup intrinsic.ll #70202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesThe test intrinsic.ll was written before UpdateTestChecks was introduced. Clean it up by removing extraneous attributes and target datalayout, before regenerating it using UpdateTestChecks. Patch is 244.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70202.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index b2ba7cfbfa3a4d0..27ad508bc456176 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,11 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-;CHECK-LABEL: @sqrt_f32(
-;CHECK: llvm.sqrt.v4f32
-;CHECK: ret void
-define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sqrt_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -14,7 +60,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
- %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
+ %call = tail call float @llvm.sqrt.f32(float %0)
%arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
store float %call, ptr %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -26,12 +72,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare float @llvm.sqrt.f32(float) nounwind readnone
-
-;CHECK-LABEL: @sqrt_f64(
-;CHECK: llvm.sqrt.v4f64
-;CHECK: ret void
-define void @sqrt_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare float @llvm.sqrt.f32(float)
+
+define void @sqrt_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sqrt_f64(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 32
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -40,7 +133,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 8
- %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
+ %call = tail call double @llvm.sqrt.f64(double %0)
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
store double %call, ptr %arrayidx2, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -52,12 +145,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-;CHECK-LABEL: @sin_f32(
-;CHECK: llvm.sin.v4f32
-;CHECK: ret void
-define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare double @llvm.sqrt.f64(double)
+
+define void @sin_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sin_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.sin.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -66,7 +206,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
- %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
+ %call = tail call float @llvm.sin.f32(float %0)
%arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
store float %call, ptr %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -78,12 +218,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare float @llvm.sin.f32(float) nounwind readnone
-
-;CHECK-LABEL: @sin_f64(
-;CHECK: llvm.sin.v4f64
-;CHECK: ret void
-define void @sin_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare float @llvm.sin.f32(float)
+
+define void @sin_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @sin_f64(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 32
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.sin.v4f64(<4 x double> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[TMP6]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
@@ -92,7 +279,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 8
- %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
+ %call = tail call double @llvm.sin.f64(double %0)
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
store double %call, ptr %arrayidx2, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -104,12 +291,59 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare double @llvm.sin.f64(double) nounwind readnone
-
-;CHECK-LABEL: @cos_f32(
-;CHECK: llvm.cos.v4f32
-;CHECK: ret void
-define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+declare double @llvm.sin.f64(double)
+
+define void @cos_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: define void @cos_f32(
+; CHECK-SAME: i32 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[X1]], [[Y2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967292
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.cos.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds fl...
[truncated]
|
Not sure if it is worth auto-generating the checks here; the file mainly tests that we can widen various intrinsics and auto-generating checks here will mean that the test will need updating any time in the future when anything unrelated to intrinsic handling cause changes, adding additional churn. It also increases the size of the test quite a bit, so if we go with auto-generating it might make sense to split the file up a bit. |
342d914
to
3b9e87b
Compare
Makes sense, although I've had several bad experiences with hand-crafted CHECK lines. Please see updated patch. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates! A few more comments inline
Hi. I'll investigate the test change in the morning, but I don't quite understand your "wrapping is strange" comments: can you clarify what you mean? I agree that it looks a bit strange on the GitHub UI, but it looks fine in my editor. |
If it is fine in the editor that's good, sounds like just a glitch in the Github UI |
Clean up intrinsic.ll by removing extraneous attributes and target datalayout, and add missing CHECK lines.
3b9e87b
to
e2d089e
Compare
Gentle ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Clean up intrinsic.ll by removing extraneous attributes and target datalayout, fix a bug in the copysign_f64 test, and add missing CHECK lines.