Skip to content

Commit 9029e6e

Browse files
frasercrmckalexey-bataev
authored andcommitted
[libclc] Refactor build system to allow in-tree builds (#87622)
The previous build system was adding custom "OpenCL" and "LLVM IR" languages in CMake to build the builtin libraries. This was making it harder to build in-tree because the tool binaries needed to be present at configure time. This commit refactors the build system to use custom commands to build the bytecode files one by one, and link them all together into the final bytecode library. It also enables in-tree builds by aliasing the clang/llvm-link/etc. tool targets to internal targets, which are imported from the LLVM installation directory when building out of tree. Diffing (with llvm-diff) all of the final bytecode libraries in an out-of-tree configuration against those built using the current tip system shows no changes. Note that there are textual changes to metadata IDs which confuse regular diff, and that llvm-diff 14 and below may show false-positives. This commit also removes a file listed in one of the SOURCEs which didn't exist and which was preventing the use of ENABLE_RUNTIME_SUBNORMAL when configuring CMake.
1 parent 03f619d commit 9029e6e

File tree

2 files changed

+50
-15
lines changed

2 files changed

+50
-15
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,32 @@ static bool isCommutative(Instruction *I) {
305305
if (auto *Cmp = dyn_cast<CmpInst>(I))
306306
return Cmp->isCommutative();
307307
if (auto *BO = dyn_cast<BinaryOperator>(I))
308-
return BO->isCommutative();
308+
return BO->isCommutative() ||
309+
(BO->getOpcode() == Instruction::Sub &&
310+
!BO->hasNUsesOrMore(UsesLimit) &&
311+
all_of(
312+
BO->uses(),
313+
[](const Use &U) {
314+
// Commutative, if icmp eq/ne sub, 0
315+
ICmpInst::Predicate Pred;
316+
if (match(U.getUser(),
317+
m_ICmp(Pred, m_Specific(U.get()), m_Zero())) &&
318+
(Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE))
319+
return true;
320+
// Commutative, if abs(sub nsw, true) or abs(sub, false).
321+
ConstantInt *Flag;
322+
return match(U.getUser(),
323+
m_Intrinsic<Intrinsic::abs>(
324+
m_Specific(U.get()), m_ConstantInt(Flag))) &&
325+
(!cast<Instruction>(U.get())->hasNoSignedWrap() ||
326+
Flag->isOne());
327+
})) ||
328+
(BO->getOpcode() == Instruction::FSub &&
329+
!BO->hasNUsesOrMore(UsesLimit) &&
330+
all_of(BO->uses(), [](const Use &U) {
331+
return match(U.getUser(),
332+
m_Intrinsic<Intrinsic::fabs>(m_Specific(U.get())));
333+
}));
309334
return I->isCommutative();
310335
}
311336

@@ -6838,7 +6863,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
68386863

68396864
// Sort operands of the instructions so that each side is more likely to
68406865
// have the same opcode.
6841-
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
6866+
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
68426867
ValueList Left, Right;
68436868
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
68446869
TE->setOperand(0, Left);
@@ -12566,8 +12591,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1256612591
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
1256712592
RHS);
1256812593
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
12569-
if (auto *I = dyn_cast<Instruction>(V))
12594+
if (auto *I = dyn_cast<Instruction>(V)) {
1257012595
V = propagateMetadata(I, E->Scalars);
12596+
// Drop nuw flags for abs(sub(commutative), true).
12597+
if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub &&
12598+
any_of(E->Scalars, [](Value *V) {
12599+
return isCommutative(cast<Instruction>(V));
12600+
}))
12601+
I->setHasNoUnsignedWrap(/*b=*/false);
12602+
}
1257112603

1257212604
V = FinalShuffle(V, E, VecTy);
1257312605

@@ -12893,6 +12925,19 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1289312925

1289412926
propagateIRFlags(V0, OpScalars, E->getMainOp(), It == MinBWs.end());
1289512927
propagateIRFlags(V1, AltScalars, E->getAltOp(), It == MinBWs.end());
12928+
auto DropNuwFlag = [&](Value *Vec, unsigned Opcode) {
12929+
// Drop nuw flags for abs(sub(commutative), true).
12930+
if (auto *I = dyn_cast<Instruction>(Vec);
12931+
I && Opcode == Instruction::Sub && !MinBWs.contains(E) &&
12932+
any_of(E->Scalars, [](Value *V) {
12933+
auto *I = cast<Instruction>(V);
12934+
return I->getOpcode() == Instruction::Sub &&
12935+
isCommutative(cast<Instruction>(V));
12936+
}))
12937+
I->setHasNoUnsignedWrap(/*b=*/false);
12938+
};
12939+
DropNuwFlag(V0, E->getOpcode());
12940+
DropNuwFlag(V1, E->getAltOpcode());
1289612941

1289712942
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
1289812943
if (auto *I = dyn_cast<Instruction>(V)) {

llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,9 @@
44

55
define i32 @test(ptr noalias %in, ptr noalias %inn, ptr %out) {
66
; CHECK-LABEL: @test(
7-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[IN:%.*]], align 1
8-
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 2
9-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_2]], align 1
10-
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr [[INN:%.*]], align 1
11-
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[INN]], i64 2
12-
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_5]], align 1
13-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
14-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
15-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
7+
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
8+
; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1
169
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[TMP7]] to <4 x i16>
17-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
18-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
19-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
2010
; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i8> [[TMP11]] to <4 x i16>
2111
; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> [[TMP12]], [[TMP8]]
2212
; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP13]], i1 false)

0 commit comments

Comments
 (0)