Skip to content

Commit f568419

Browse files
committed
[SLP] Enable optimization of freeze instructions
Allow SLP optimization to progress in the presence of freeze instructions. Prior to this commit, freeze instructions blocked SLP optimization. The following URL shows correctness of the addsub_freeze test: https://alive2.llvm.org/ce/z/qm38oh
1 parent 445023f commit f568419

File tree

3 files changed

+42
-52
lines changed

3 files changed

+42
-52
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6493,6 +6493,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
64936493
case Instruction::And:
64946494
case Instruction::Or:
64956495
case Instruction::Xor:
6496+
case Instruction::Freeze:
64966497
return TreeEntry::Vectorize;
64976498
case Instruction::GetElementPtr: {
64986499
// We don't combine GEPs with complicated (nested) indexing.
@@ -7330,7 +7331,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
73307331
case Instruction::AShr:
73317332
case Instruction::And:
73327333
case Instruction::Or:
7333-
case Instruction::Xor: {
7334+
case Instruction::Xor:
7335+
case Instruction::Freeze: {
73347336
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
73357337
ReuseShuffleIndices);
73367338
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
@@ -9863,10 +9865,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
98639865
case Instruction::AShr:
98649866
case Instruction::And:
98659867
case Instruction::Or:
9866-
case Instruction::Xor: {
9868+
case Instruction::Xor:
9869+
case Instruction::Freeze: {
98679870
auto GetScalarCost = [&](unsigned Idx) {
98689871
auto *VI = cast<Instruction>(UniqueValues[Idx]);
9869-
unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1;
9872+
unsigned OpIdx = isa<UnaryOperator>(VI) || isa<FreezeInst>(VI) ? 0 : 1;
98709873
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
98719874
TTI::OperandValueInfo Op2Info =
98729875
TTI::getOperandInfo(VI->getOperand(OpIdx));
@@ -9885,7 +9888,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
98859888
return CommonCost;
98869889
}
98879890
}
9888-
unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1;
9891+
unsigned OpIdx = isa<UnaryOperator>(VL0) || isa<FreezeInst>(VL0) ? 0 : 1;
98899892
TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
98909893
TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
98919894
return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
@@ -13390,6 +13393,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1339013393

1339113394
return V;
1339213395
}
13396+
case Instruction::Freeze: {
13397+
setInsertPointAfterBundle(E);
13398+
13399+
Value *Op = vectorizeOperand(E, 0, PostponedPHIs);
13400+
13401+
if (E->VectorizedValue) {
13402+
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
13403+
return E->VectorizedValue;
13404+
}
13405+
13406+
Value *V = Builder.CreateFreeze(Op);
13407+
V = FinalShuffle(V, E, VecTy);
13408+
13409+
E->VectorizedValue = V;
13410+
++NumVectorInstructions;
13411+
13412+
return V;
13413+
}
1339313414
case Instruction::Add:
1339413415
case Instruction::FAdd:
1339513416
case Instruction::Sub:

llvm/test/Transforms/SLPVectorizer/X86/addsub.ll

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,17 @@ entry:
6868
define void @addsub_freeze() #0 {
6969
; CHECK-LABEL: @addsub_freeze(
7070
; CHECK-NEXT: entry:
71-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @b, align 4
72-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
73-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
74-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
75-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4
76-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
77-
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
78-
; CHECK-NEXT: [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
79-
; CHECK-NEXT: store i32 [[FREEZE_ADD2]], ptr @a, align 4
80-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
81-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
82-
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
83-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
84-
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
85-
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
86-
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
87-
; CHECK-NEXT: [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
88-
; CHECK-NEXT: store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
89-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
90-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
91-
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
92-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
93-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
94-
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
95-
; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
96-
; CHECK-NEXT: [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
97-
; CHECK-NEXT: store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
98-
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
99-
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
100-
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
101-
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
102-
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
103-
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
104-
; CHECK-NEXT: [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
105-
; CHECK-NEXT: [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
106-
; CHECK-NEXT: store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
71+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
72+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
73+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
74+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
75+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
76+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
77+
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
78+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
79+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
80+
; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i32> [[TMP8]]
81+
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @a, align 4
10782
; CHECK-NEXT: ret void
10883
;
10984
entry:

llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,12 @@ define void @fmuladd_2f64() #0 {
4848

4949
define void @fmuladd_2f64_freeze() #0 {
5050
; CHECK-LABEL: @fmuladd_2f64_freeze(
51-
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @srcA64, align 8
52-
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
53-
; CHECK-NEXT: [[B0:%.*]] = load double, ptr @srcB64, align 8
54-
; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
55-
; CHECK-NEXT: [[C0:%.*]] = load double, ptr @srcC64, align 8
56-
; CHECK-NEXT: [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
57-
; CHECK-NEXT: [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
58-
; CHECK-NEXT: [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
59-
; CHECK-NEXT: [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
60-
; CHECK-NEXT: [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
61-
; CHECK-NEXT: store double [[FREEZE0]], ptr @dst64, align 8
62-
; CHECK-NEXT: store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
51+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
52+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
53+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
54+
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
55+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <2 x double> [[TMP4]]
56+
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr @dst64, align 8
6357
; CHECK-NEXT: ret void
6458
;
6559
%a0 = load double, ptr @srcA64, align 8

0 commit comments

Comments
 (0)