Skip to content

Commit 1f6e124

Browse files
committed
[SLP] Add Preheader to CSE blocks after hoisting CSE-able instrs.
Adding the pre-header to CSEBlocks ensures instructions are CSE'd even after hoisting. This was original discovered by @atrick a while ago. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D133649 (cherry-picked from 3fd1cc2)
1 parent 36b07da commit 1f6e124

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7641,6 +7641,7 @@ void BoUpSLP::optimizeGatherSequence() {
76417641

76427642
// We can hoist this instruction. Move it to the pre-header.
76437643
I->moveBefore(PreHeader->getTerminator());
7644+
CSEBlocks.insert(PreHeader);
76447645
}
76457646

76467647
// Make a list of all reachable blocks in our CSE queue.

llvm/test/Transforms/SLPVectorizer/X86/cse.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -353,19 +353,17 @@ define void @cse_for_hoisted_instructions_in_preheader(i32* %dst, i32 %a, i1 %c)
353353
; CHECK-NEXT: entry:
354354
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
355355
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A]], i32 1
356-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
357-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[A]], i32 1
358356
; CHECK-NEXT: br label [[LOOP:%.*]]
359357
; CHECK: loop:
360-
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> <i32 22, i32 22>, [[TMP1]]
358+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> <i32 22, i32 22>, [[TMP1]]
361359
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 0
362-
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP4]], <i32 3, i32 3>
363-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>*
364-
; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
365-
; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP3]], <i32 3, i32 3>
360+
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], <i32 3, i32 3>
361+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>*
362+
; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4
363+
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], <i32 3, i32 3>
366364
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 10
367-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>*
368-
; CHECK-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP8]], align 4
365+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>*
366+
; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
369367
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
370368
; CHECK: exit:
371369
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)