Skip to content

Commit bc22572

Browse files
committed
[SLP] Enable optimization of freeze instructions
Allow SLP optimization to progress in the presence of freeze instructions. Prior to this commit, freeze instructions blocked SLP optimization. The following URL shows correctness of the addsub_freeze test: https://alive2.llvm.org/ce/z/qm38oh
1 parent 5115a76 commit bc22572

File tree

3 files changed

+42
-52
lines changed

3 files changed

+42
-52
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6468,6 +6468,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
64686468
case Instruction::And:
64696469
case Instruction::Or:
64706470
case Instruction::Xor:
6471+
case Instruction::Freeze:
64716472
return TreeEntry::Vectorize;
64726473
case Instruction::GetElementPtr: {
64736474
// We don't combine GEPs with complicated (nested) indexing.
@@ -7305,7 +7306,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
73057306
case Instruction::AShr:
73067307
case Instruction::And:
73077308
case Instruction::Or:
7308-
case Instruction::Xor: {
7309+
case Instruction::Xor:
7310+
case Instruction::Freeze: {
73097311
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
73107312
ReuseShuffleIndices);
73117313
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
@@ -9790,10 +9792,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
97909792
case Instruction::AShr:
97919793
case Instruction::And:
97929794
case Instruction::Or:
9793-
case Instruction::Xor: {
9795+
case Instruction::Xor:
9796+
case Instruction::Freeze: {
97949797
auto GetScalarCost = [&](unsigned Idx) {
97959798
auto *VI = cast<Instruction>(UniqueValues[Idx]);
9796-
unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1;
9799+
unsigned OpIdx = isa<UnaryOperator>(VI) || isa<FreezeInst>(VI) ? 0 : 1;
97979800
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
97989801
TTI::OperandValueInfo Op2Info =
97999802
TTI::getOperandInfo(VI->getOperand(OpIdx));
@@ -9812,7 +9815,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
98129815
return CommonCost;
98139816
}
98149817
}
9815-
unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1;
9818+
unsigned OpIdx = isa<UnaryOperator>(VL0) || isa<FreezeInst>(VL0) ? 0 : 1;
98169819
TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
98179820
TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
98189821
return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
@@ -13298,6 +13301,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1329813301

1329913302
return V;
1330013303
}
13304+
case Instruction::Freeze: {
13305+
setInsertPointAfterBundle(E);
13306+
13307+
Value *Op = vectorizeOperand(E, 0, PostponedPHIs);
13308+
13309+
if (E->VectorizedValue) {
13310+
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
13311+
return E->VectorizedValue;
13312+
}
13313+
13314+
Value *V = Builder.CreateFreeze(Op);
13315+
V = FinalShuffle(V, E, VecTy);
13316+
13317+
E->VectorizedValue = V;
13318+
++NumVectorInstructions;
13319+
13320+
return V;
13321+
}
1330113322
case Instruction::Add:
1330213323
case Instruction::FAdd:
1330313324
case Instruction::Sub:

llvm/test/Transforms/SLPVectorizer/X86/addsub.ll

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,17 @@ entry:
6868
define void @addsub_freeze() #0 {
6969
; CHECK-LABEL: @addsub_freeze(
7070
; CHECK-NEXT: entry:
71-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @b, align 4
72-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
73-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
74-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
75-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4
76-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
77-
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
78-
; CHECK-NEXT: [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
79-
; CHECK-NEXT: store i32 [[FREEZE_ADD2]], ptr @a, align 4
80-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
81-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
82-
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
83-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
84-
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
85-
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
86-
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
87-
; CHECK-NEXT: [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
88-
; CHECK-NEXT: store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
89-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
90-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
91-
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
92-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
93-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
94-
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
95-
; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
96-
; CHECK-NEXT: [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
97-
; CHECK-NEXT: store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
98-
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
99-
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
100-
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
101-
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
102-
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
103-
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
104-
; CHECK-NEXT: [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
105-
; CHECK-NEXT: [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
106-
; CHECK-NEXT: store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
71+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
72+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
73+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
74+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
75+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
76+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
77+
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
78+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
79+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
80+
; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i32> [[TMP8]]
81+
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @a, align 4
10782
; CHECK-NEXT: ret void
10883
;
10984
entry:

llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,12 @@ define void @fmuladd_2f64() #0 {
4848

4949
define void @fmuladd_2f64_freeze() #0 {
5050
; CHECK-LABEL: @fmuladd_2f64_freeze(
51-
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @srcA64, align 8
52-
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
53-
; CHECK-NEXT: [[B0:%.*]] = load double, ptr @srcB64, align 8
54-
; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
55-
; CHECK-NEXT: [[C0:%.*]] = load double, ptr @srcC64, align 8
56-
; CHECK-NEXT: [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
57-
; CHECK-NEXT: [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
58-
; CHECK-NEXT: [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
59-
; CHECK-NEXT: [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
60-
; CHECK-NEXT: [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
61-
; CHECK-NEXT: store double [[FREEZE0]], ptr @dst64, align 8
62-
; CHECK-NEXT: store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
51+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
52+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
53+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
54+
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
55+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <2 x double> [[TMP4]]
56+
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr @dst64, align 8
6357
; CHECK-NEXT: ret void
6458
;
6559
%a0 = load double, ptr @srcA64, align 8

0 commit comments

Comments
 (0)