Skip to content

[Vectorizer] precommit test for miscompilation #120731

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

fmayer
Copy link
Contributor

@fmayer fmayer commented Dec 20, 2024

we generate GEPs that are out of bounds but mark them as "inbound"

Created using spr 1.3.4
@llvmbot
Copy link
Member

llvmbot commented Dec 20, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Florian Mayer (fmayer)

Changes

we generate GEPs that are out of bounds but mark them as "inbound"


Full diff: https://github.com/llvm/llvm-project/pull/120731.diff

1 Files Affected:

  • (added) llvm/test/Transforms/LoopVectorize/vplan-noinbounds-gep.ll (+107)
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-noinbounds-gep.ll b/llvm/test/Transforms/LoopVectorize/vplan-noinbounds-gep.ll
new file mode 100644
index 00000000000000..99605971298fe9
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-noinbounds-gep.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
+
+source_filename = "<stdin>"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(none)
+define i1 @fn() local_unnamed_addr #0 {
+; CHECK-LABEL: define i1 @fn(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NNO:%.*]] = alloca [12 x i32], align 16
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 10, i64 9, i64 8, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 10, [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 10)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
+; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[REVERSE]], <4 x i32> poison)
+; CHECK-NEXT:    [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[REVERSE1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 10)
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[REVERSE1]], <4 x i32> [[TMP8]]
+; CHECK-NEXT:    [[TMP11]] = or <4 x i32> [[PREDPHI]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP12]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END36:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -2, [[MIDDLE_BLOCK]] ], [ 10, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[FOR_BODY20:%.*]]
+; CHECK:       for.body20:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC35:%.*]] ]
+; CHECK-NEXT:    [[SUM_01:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_1:%.*]], [[FOR_INC35]] ]
+; CHECK-NEXT:    [[REM4:%.*]] = and i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[CMP21:%.*]] = icmp eq i64 [[REM4]], 0
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw [12 x i32], ptr [[NNO]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
+; CHECK-NEXT:    br i1 [[CMP21]], label [[IF_THEN22:%.*]], label [[FOR_INC35]]
+; CHECK:       if.then22:
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[TMP15]], 1
+; CHECK-NEXT:    [[REM27:%.*]] = urem i32 [[MUL]], 10
+; CHECK-NEXT:    br label [[FOR_INC35]]
+; CHECK:       for.inc35:
+; CHECK-NEXT:    [[REM27_PN:%.*]] = phi i32 [ [[REM27]], [[IF_THEN22]] ], [ [[TMP15]], [[FOR_BODY20]] ]
+; CHECK-NEXT:    [[SUM_1]] = or i32 [[REM27_PN]], [[SUM_01]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    [[CMP19_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; CHECK-NEXT:    br i1 [[CMP19_NOT]], label [[FOR_END36]], label [[FOR_BODY20]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.end36:
+; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC35]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[CMP41:%.*]] = icmp eq i32 [[SUM_1_LCSSA]], 0
+; CHECK-NEXT:    ret i1 [[CMP41]]
+;
+entry:
+  %nno = alloca [12 x i32], align 16
+  br label %for.body20
+
+for.body20:                                       ; preds = %entry, %for.inc35
+  %indvars.iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc35 ]
+  %rem4 = and i64 %indvars.iv, 1
+  %cmp21 = icmp eq i64 %rem4, 0
+  %arrayidx24 = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx24, align 4
+  br i1 %cmp21, label %if.then22, label %for.inc35
+
+if.then22:                                        ; preds = %for.body20
+  %mul = shl i32 %0, 1
+  %rem27 = urem i32 %mul, 10
+  br label %for.inc35
+
+for.inc35:                                        ; preds = %for.body20, %if.then22
+  %rem27.pn = phi i32 [ %rem27, %if.then22 ], [ %0, %for.body20 ]
+  %sum.1 = or i32 %rem27.pn, %sum.01
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp19.not = icmp eq i64 %indvars.iv, 0
+  br i1 %cmp19.not, label %for.end36, label %for.body20
+
+for.end36:                                        ; preds = %for.inc35
+  %sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
+  %cmp41 = icmp eq i32 %sum.1.lcssa, 0
+  ret i1 %cmp41
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(none) "target-features"="+aes,+avx,+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }

@fmayer fmayer requested a review from fhahn December 20, 2024 13:10

source_filename = "<stdin>"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this X86 specific? If so, it needs to be moved to the X86 subdirectory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s

source_filename = "<stdin>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

; CHECK-NEXT: ret i1 [[CMP41]]
;
entry:
%nno = alloca [12 x i32], align 16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be passed as argument? Otherwise it needs to be initialized (e.g. passing to a @init() function, to avoid reading uninitialized memory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

@@ -0,0 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s

no nested loops, so no the 'native' path isn't used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%nno = alloca [12 x i32], align 16
br label %for.body20

for.body20: ; preds = %entry, %for.inc35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to clean up names, e.g.

Suggested change
for.body20: ; preds = %entry, %for.inc35
loop.header:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done (this was from llvm-reduce :))

%rem27 = urem i32 %mul, 10
br label %for.inc35

for.inc35: ; preds = %for.body20, %if.then22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for.inc35: ; preds = %for.body20, %if.then22
loop.latch:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

%sum.01 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc35 ]
%rem4 = and i64 %indvars.iv, 1
%cmp21 = icmp eq i64 %rem4, 0
%arrayidx24 = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %indvars.iv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%arrayidx24 = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %indvars.iv
%gep = getelementptr inbounds nuw i32, ptr %nno, i64 %indvars.iv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

br label %for.body20

for.body20: ; preds = %entry, %for.inc35
%indvars.iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%indvars.iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]
%iv = phi i64 [ 10, %entry ], [ %indvars.iv.next, %for.inc35 ]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Comment on lines 101 to 104
for.end36: ; preds = %for.inc35
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
%cmp41 = icmp eq i32 %sum.1.lcssa, 0
ret i1 %cmp41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for.end36: ; preds = %for.inc35
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
%cmp41 = icmp eq i32 %sum.1.lcssa, 0
ret i1 %cmp41
exit:
%sum.1.lcssa = phi i32 [ %sum.1, %for.inc35 ]
ret i32 %sum.1.lcssa

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Created using spr 1.3.4
@fmayer fmayer requested a review from fhahn December 20, 2024 14:06
Created using spr 1.3.4
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

Some more suggestions inline

As for the test name, vplan-... is usually only used when the test checks the printed VPlans. maybe drop-inbounds-flags-for-reverse-vector-pointer.ll

%sum.01 = phi i32 [ 0, %entry ], [ %sum.1, %loop.latch ]
%rem4 = and i64 %iv, 1
%cmp21 = icmp eq i64 %rem4, 0
%gep = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %iv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%gep = getelementptr inbounds nuw [12 x i32], ptr %nno, i64 0, i64 %iv
%gep = getelementptr inbounds nuw i32, ptr %nno, i64 %iv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

ret i1 %cmp41
}

attributes #0 = { nofree norecurse nosync nounwind memory(none) "target-features"="+aes,+avx,+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please clean those up and retain only th eneeded flags (+avx?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(none)
define i1 @fn(ptr %nno) local_unnamed_addr #0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define i1 @fn(ptr %nno) local_unnamed_addr #0 {
define i1 @fn(ptr %nno) #0 {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(none)
define i1 @fn(ptr %nno) local_unnamed_addr #0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to add a comment sayin what this tests, e.g. something like FIXME: GEP flags on GEPs for reverse vector pointer need to be dropped when folding the tail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Created using spr 1.3.4
@fmayer
Copy link
Contributor Author

fmayer commented Jan 3, 2025

thanks for the review

Created using spr 1.3.4
@fmayer fmayer merged commit 62b5cf0 into main Jan 3, 2025
8 checks passed
@fmayer fmayer deleted the users/fmayer/spr/vectorizer-precommit-test-for-miscompilation branch January 3, 2025 14:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants