Skip to content

Commit bb82c79

Browse files
authored
[SLP] Enable optimization of freeze instructions (#102217)
Allow SLP optimization to progress in the presence of freeze instructions. Prior to this commit, freeze instructions blocked SLP optimization. The following URL shows correctness of the addsub_freeze test: https://alive2.llvm.org/ce/z/qm38oh
1 parent 172ccfe commit bb82c79

File tree

3 files changed

+40
-49
lines changed

3 files changed

+40
-49
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6493,6 +6493,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
64936493
case Instruction::And:
64946494
case Instruction::Or:
64956495
case Instruction::Xor:
6496+
case Instruction::Freeze:
64966497
return TreeEntry::Vectorize;
64976498
case Instruction::GetElementPtr: {
64986499
// We don't combine GEPs with complicated (nested) indexing.
@@ -7330,7 +7331,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
73307331
case Instruction::AShr:
73317332
case Instruction::And:
73327333
case Instruction::Or:
7333-
case Instruction::Xor: {
7334+
case Instruction::Xor:
7335+
case Instruction::Freeze: {
73347336
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
73357337
ReuseShuffleIndices);
73367338
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
@@ -10118,6 +10120,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1011810120
};
1011910121
return GetCostDiff(GetScalarCost, GetVectorCost);
1012010122
}
10123+
case Instruction::Freeze:
10124+
return CommonCost;
1012110125
default:
1012210126
llvm_unreachable("Unknown instruction");
1012310127
}
@@ -13390,6 +13394,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1339013394

1339113395
return V;
1339213396
}
13397+
case Instruction::Freeze: {
13398+
setInsertPointAfterBundle(E);
13399+
13400+
Value *Op = vectorizeOperand(E, 0, PostponedPHIs);
13401+
13402+
if (E->VectorizedValue) {
13403+
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
13404+
return E->VectorizedValue;
13405+
}
13406+
13407+
Value *V = Builder.CreateFreeze(Op);
13408+
V = FinalShuffle(V, E, VecTy);
13409+
13410+
E->VectorizedValue = V;
13411+
++NumVectorInstructions;
13412+
13413+
return V;
13414+
}
1339313415
case Instruction::Add:
1339413416
case Instruction::FAdd:
1339513417
case Instruction::Sub:

llvm/test/Transforms/SLPVectorizer/X86/addsub.ll

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,17 @@ entry:
6868
define void @addsub_freeze() #0 {
6969
; CHECK-LABEL: @addsub_freeze(
7070
; CHECK-NEXT: entry:
71-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @b, align 4
72-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
73-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
74-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
75-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4
76-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
77-
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
78-
; CHECK-NEXT: [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
79-
; CHECK-NEXT: store i32 [[FREEZE_ADD2]], ptr @a, align 4
80-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
81-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
82-
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
83-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
84-
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
85-
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
86-
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
87-
; CHECK-NEXT: [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
88-
; CHECK-NEXT: store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
89-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
90-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
91-
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
92-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
93-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
94-
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
95-
; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
96-
; CHECK-NEXT: [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
97-
; CHECK-NEXT: store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
98-
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
99-
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
100-
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
101-
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
102-
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
103-
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
104-
; CHECK-NEXT: [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
105-
; CHECK-NEXT: [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
106-
; CHECK-NEXT: store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
71+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
72+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
73+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
74+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
75+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
76+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
77+
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
78+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
79+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
80+
; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i32> [[TMP8]]
81+
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @a, align 4
10782
; CHECK-NEXT: ret void
10883
;
10984
entry:

llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,12 @@ define void @fmuladd_2f64() #0 {
4848

4949
define void @fmuladd_2f64_freeze() #0 {
5050
; CHECK-LABEL: @fmuladd_2f64_freeze(
51-
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @srcA64, align 8
52-
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
53-
; CHECK-NEXT: [[B0:%.*]] = load double, ptr @srcB64, align 8
54-
; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
55-
; CHECK-NEXT: [[C0:%.*]] = load double, ptr @srcC64, align 8
56-
; CHECK-NEXT: [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
57-
; CHECK-NEXT: [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
58-
; CHECK-NEXT: [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
59-
; CHECK-NEXT: [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
60-
; CHECK-NEXT: [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
61-
; CHECK-NEXT: store double [[FREEZE0]], ptr @dst64, align 8
62-
; CHECK-NEXT: store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
51+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
52+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
53+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
54+
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
55+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <2 x double> [[TMP4]]
56+
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr @dst64, align 8
6357
; CHECK-NEXT: ret void
6458
;
6559
%a0 = load double, ptr @srcA64, align 8

0 commit comments

Comments
 (0)