Skip to content

Commit 39913ae

Browse files
authored
[SLP][REVEC] Make reorderTopToBottom support ShuffleVectorInst. (#117310)
We don't want reorderTopToBottom to reorder ShuffleVectorInst (because ShuffleVectorInst currently supports only a limited set of patterns). Either we make ShuffleVectorInst support more patterns, or we let ReorderIndices reorder the result of the vectorization of ShuffleVectorInst. We choose the latter solution.
1 parent 4cc2785 commit 39913ae

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6017,8 +6017,9 @@ void BoUpSLP::reorderTopToBottom() {
60176017
}
60186018
if ((TE->State == TreeEntry::Vectorize ||
60196019
TE->State == TreeEntry::StridedVectorize) &&
6020-
isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
6021-
InsertElementInst>(TE->getMainOp())) {
6020+
(isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
6021+
InsertElementInst>(TE->getMainOp()) ||
6022+
(SLPReVec && isa<ShuffleVectorInst>(TE->getMainOp())))) {
60226023
assert(!TE->isAltShuffle() &&
60236024
"Alternate instructions are only supported by BinaryOperator "
60246025
"and CastInst.");

llvm/test/Transforms/SLPVectorizer/revec.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,3 +409,41 @@ entry:
409409
%23 = fcmp ogt <8 x float> zeroinitializer, %19
410410
ret void
411411
}
412+
413+
define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) {
414+
; CHECK-LABEL: @test13(
415+
; CHECK-NEXT: entry:
416+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0)
417+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
418+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
419+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
420+
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]]
421+
; CHECK: for.end.loopexit:
422+
; CHECK-NEXT: [[TMP5:%.*]] = phi <16 x i32> [ [[TMP4]], [[ENTRY:%.*]] ]
423+
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP5]], i64 12)
424+
; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer
425+
; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4
426+
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 0)
427+
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4
428+
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 8)
429+
; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4
430+
; CHECK-NEXT: ret void
431+
;
432+
entry:
433+
%1 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
434+
%2 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
435+
%3 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
436+
%4 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
437+
br label %for.end.loopexit
438+
439+
for.end.loopexit:
440+
%phi0 = phi <4 x i32> [ %1, %entry ]
441+
%phi1 = phi <4 x i32> [ %2, %entry ]
442+
%phi2 = phi <4 x i32> [ %3, %entry ]
443+
%phi3 = phi <4 x i32> [ %4, %entry ]
444+
%or0 = or <4 x i32> %phi1, zeroinitializer
445+
store <4 x i32> %or0, ptr %out0, align 4
446+
store <4 x i32> %1, ptr %out1, align 4
447+
store <4 x i32> %4, ptr %out2, align 4
448+
ret void
449+
}

0 commit comments

Comments
 (0)