-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Precommit testcase for pr81872 #84782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Testcase shows miscompile when dropping disjoint flag from disjoint or during vectorization.
@llvm/pr-subscribers-llvm-transforms Author: None (annamthomas) ChangesTestcase shows miscompile when dropping disjoint flag from disjoint or Full diff: https://github.com/llvm/llvm-project/pull/84782.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
new file mode 100644
index 00000000000000..c6b1944b200908
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = external global ptr addrspace(1), align 8
+
+; PR 81872 explains the issue.
+
+; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
+; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
+; of dominating conditions. Dropping the disjoint to avoid poison still changes
+; the behaviour since now the or is no longer equivalent to the add.
+; Function Attrs: uwtable
+define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr noundef align 8 dereferenceable_or_null(16) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: bb5:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 99, i64 98, i64 97, i64 96>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 99, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ]
+; CHECK-NEXT: br label [[BB15:%.*]]
+; CHECK: bb15:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
+; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
+; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF5:![0-9]+]]
+; CHECK: bb18:
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]]
+; CHECK-NEXT: store i64 1, ptr [[GETELEMENTPTR19]], align 8
+; CHECK-NEXT: br label [[BB20]]
+; CHECK: bb20:
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
+; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: bb6:
+; CHECK-NEXT: ret void
+;
+bb5:
+ br label %bb15
+
+bb15: ; preds = %bb20, %bb8
+ %iv = phi i64 [ 99, %bb5 ], [ %iv.next, %bb20 ]
+ %and = and i64 %iv, 1
+ %icmp17 = icmp eq i64 %and, 0
+ br i1 %icmp17, label %bb18, label %bb20, !prof !21
+
+bb18: ; preds = %bb15
+ %or = or disjoint i64 %iv, 1
+ %getelementptr19 = getelementptr inbounds i64, ptr %arr, i64 %or
+ store i64 1, ptr %getelementptr19, align 8
+ br label %bb20
+
+bb20: ; preds = %bb18, %bb15
+ %iv.next = add nsw i64 %iv, -1
+ %icmp22 = icmp eq i64 %iv.next, 90
+ br i1 %icmp22, label %bb6, label %bb15, !prof !22
+
+bb6:
+ ret void
+}
+
+attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
+
+!4 = !{}
+!10 = !{i32 1}
+!16 = !{i64 864}
+!17 = !{i64 8}
+!21 = !{!"branch_weights", i32 1, i32 1}
+!22 = !{!"branch_weights", i32 1, i32 95}
+
+
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks! (Some minor editorial suggestions inline)
Would also be good prefix title with |
oh I missed these comments and landed the change. Will address post commit. |
Testcase shows miscompile when dropping disjoint flag from disjoint or
during vectorization.