-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoopVectorize] Add test case for minloc reduction #141556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b4ee218
b799f55
01dee72
77fe860
fbf4ac5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW1-IL4 | ||
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW4-IL1 | ||
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW4-IL2 | ||
|
||
; This test case is extracted from rnflow (fortran) benchmark in polyhedron benchmark suite. | ||
; The function minlst primarily takes two indices (i.e. range), scans backwards in the range | ||
; and returns the firstIV of the minimum value. | ||
|
||
define i32 @minlst(i32 %first_index, i32 %last_index, ptr %array) { | ||
; CHECK-REV-MIN-VW1-IL4-LABEL: define i32 @minlst( | ||
; CHECK-REV-MIN-VW1-IL4-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) { | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[ENTRY:.*]]: | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] | ||
; CHECK-REV-MIN-VW1-IL4: [[LOOP_PREHEADER]]: | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: br label %[[LOOP:.*]] | ||
; CHECK-REV-MIN-VW1-IL4: [[LOOP]]: | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1 | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]] | ||
; CHECK-REV-MIN-VW1-IL4: [[__CRIT_EDGE_LOOPEXIT:.*:]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: br [[DOT_CRIT_EDGE]] | ||
; CHECK-REV-MIN-VW1-IL4: [[__CRIT_EDGE:.*:]] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] | ||
; CHECK-REV-MIN-VW1-IL4-NEXT: ret i32 [[LAST_INDEX_RET]] | ||
; | ||
; CHECK-REV-MIN-VW4-IL1-LABEL: define i32 @minlst( | ||
; CHECK-REV-MIN-VW4-IL1-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) { | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[ENTRY:.*]]: | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] | ||
; CHECK-REV-MIN-VW4-IL1: [[LOOP_PREHEADER]]: | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: br label %[[LOOP:.*]] | ||
; CHECK-REV-MIN-VW4-IL1: [[LOOP]]: | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1 | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]] | ||
; CHECK-REV-MIN-VW4-IL1: [[__CRIT_EDGE_LOOPEXIT:.*:]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: br [[DOT_CRIT_EDGE]] | ||
; CHECK-REV-MIN-VW4-IL1: [[__CRIT_EDGE:.*:]] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] | ||
; CHECK-REV-MIN-VW4-IL1-NEXT: ret i32 [[LAST_INDEX_RET]] | ||
; | ||
; CHECK-REV-MIN-VW4-IL2-LABEL: define i32 @minlst( | ||
; CHECK-REV-MIN-VW4-IL2-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) { | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[ENTRY:.*]]: | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] | ||
; CHECK-REV-MIN-VW4-IL2: [[LOOP_PREHEADER]]: | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: br label %[[LOOP:.*]] | ||
; CHECK-REV-MIN-VW4-IL2: [[LOOP]]: | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1 | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]] | ||
; CHECK-REV-MIN-VW4-IL2: [[__CRIT_EDGE_LOOPEXIT:.*:]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: br [[DOT_CRIT_EDGE]] | ||
; CHECK-REV-MIN-VW4-IL2: [[__CRIT_EDGE:.*:]] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] | ||
; CHECK-REV-MIN-VW4-IL2-NEXT: ret i32 [[LAST_INDEX_RET]] | ||
; | ||
entry: | ||
%first_index_sext = sext i32 %first_index to i64 | ||
%last_index_neg = sub i32 0, %last_index | ||
%last_index_neg_sext = sext i32 %last_index_neg to i64 | ||
%add = add nsw i64 %first_index_sext, %last_index_neg_sext | ||
%diff = sub nsw i64 0, %add | ||
%first_ptr = getelementptr i8, ptr %array, i64 -8 | ||
%second_ptr = getelementptr i8, ptr %array, i64 -4 | ||
%early_exit_cond = icmp slt i64 %add, 0 | ||
br i1 %early_exit_cond, label %loop.preheader, label %._crit_edge | ||
|
||
loop.preheader: ; preds = %entry | ||
%last_index_sext = sext i32 %last_index to i64 | ||
br label %loop | ||
|
||
loop: ; preds = %loop.preheader, %loop | ||
%iv = phi i64 [%iv.next, %loop], [ %last_index_sext, %loop.preheader ] | ||
%dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ] | ||
%index = phi i32 [ %select, %loop ], [ %last_index, %loop.preheader ] | ||
%iv.next = add nsw i64 %iv, -1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see Fortran source code. The source has reverse-counting loop thus we it in IR; unfortunately we can't generalize this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, then I think this test is in the wrong directory. If there is no intention of anyone doing work to loop-vectorise this in the near future then it shouldn't live here. |
||
%load1_ptr = getelementptr float, ptr %first_ptr, i64 %iv | ||
%load1 = load float, ptr %load1_ptr, align 4 | ||
%index_sext = sext i32 %index to i64 | ||
%load2_ptr = getelementptr float, ptr %second_ptr, i64 %index_sext | ||
%load2 = load float, ptr %load2_ptr, align 4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @Mel-Chen, this looks very difficult to loop-vectorise because the pointer depends upon a value loaded from the previous iteration. Surely, the root problem here is that LoadPRE in an earlier IR pass has failed to spot this recurrence and keep a copy of the last loaded value corresponding to
not only would the scalar version be faster, but it might give the loop vectoriser a better chance of using existing min/max reduction infrastructure to vectorise it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps I'm missing some critical limitation here, but it feels like the focus we should be trying to optimise the scalar IR before we attempt vectorisation. There are no stores in the loop and the bounds seem to be well-known. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right. My first attempt was to fix this in GVN. Indeed, GCC handles this in GVN pass. A bit more context, GCC employs very sophisticated GVN algorithm based on Availability and anticipability. LLVM's current implementation is not mature and is based on an older algorithm from literature. I had started Discourse post here for bigger context. https://discourse.llvm.org/t/newgvn-gvn-pre-plans/84746 Considering this, LoopIdiomVectorize pass seems most suitable to me. You can also find our previous discussion on Discourse here. https://discourse.llvm.org/t/vectorizing-min-max-reduction-pattern/85766 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the recurrence pattern can be detected/optimized in LoopIdiomRecognize, what prevents us to detect/optimize the scalar code in isolation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently the limitation in GVN (if we go by what GCC does). GVN is GCC is much more sophisticated than what LLVM has and improving LLVM's GVN is a large effort. However, having said that, optimizing scalar code would lead to 10-12% gain - as we would eliminate just the PRE case, vectorization leads to 4x speedup on an average. |
||
%cmp = fcmp contract olt float %load1, %load2 | ||
%iv.next.trunc = trunc nsw i64 %iv.next to i32 | ||
%select = select i1 %cmp, i32 %iv.next.trunc, i32 %index | ||
%dec = add nsw i64 %dec_iv, -1 | ||
%loop_cond = icmp sgt i64 %dec_iv, 1 | ||
br i1 %loop_cond, label %loop, label %._crit_edge | ||
|
||
._crit_edge: ; preds = %loop, %entry | ||
%last_index_ret = phi i32 [ %select, %loop ], [ %last_index, %entry ] | ||
ret i32 %last_index_ret | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this testing a common pattern from Fortran, i.e. minloc that looks for the location of the minimum value in an array? I just want to understand if this is something very common to Fortran in general, or for one very specific case in a particular benchmark? If it's the former, then I think it makes sense to create more general, hand-written tests that demonstrate minloc in a variety of situations, i.e. different array types, etc.