Skip to content

Commit c416341

Browse files
vmustyaigcbot
authored andcommitted
Adjust BFN matching heuristics for pattern matching
VC shouldn't match non-profitable patterns. Flag register cannot be source for BFN instruction. The finalizer emits extra MOV instructions in such cases. So, it's better to avoid them, when the total amount of instruction is not reduced. We also need to reduce the use threshold for instructions folded into the BFN. This will reduce the value live ranges and improve the register pressure.
1 parent 4bbc4cf commit c416341

File tree

2 files changed

+51
-23
lines changed

2 files changed

+51
-23
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,11 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
242242
Changed |= reassociateIntegerMad(&F);
243243
}
244244

245+
visit(F);
246+
245247
if (EnableBfnMatcher && ST->hasAdd3Bfn())
246248
matchBFN(F);
247249

248-
visit(F);
249-
250250
if (Kind == PatternMatchKind::PreLegalization) {
251251
Changed |= placeConstants(&F);
252252
Changed |= vectorizeConstants(&F);
@@ -396,7 +396,7 @@ class BfnMatcher {
396396
static constexpr StringRef OpNames[] = {"not", "and", "or", "xor"};
397397
static constexpr unsigned LutValues[] = {0xaa, 0xcc, 0xf0};
398398

399-
static constexpr unsigned UsesThreshold = 4;
399+
static constexpr unsigned UsesThreshold = 2;
400400
static constexpr unsigned SourceLimit = 3;
401401

402402
public:
@@ -411,7 +411,7 @@ class BfnMatcher {
411411
if (!Ty->isIntOrIntVectorTy(16) && !Ty->isIntOrIntVectorTy(32))
412412
return false;
413413

414-
unsigned MatchedOps = 0;
414+
MatchedOps = 0;
415415
Srcs.insert(MainInst);
416416

417417
// Grow the pattern to find the source operands using a BFS.
@@ -422,7 +422,7 @@ class BfnMatcher {
422422
auto *Inst = Queue.front();
423423
Queue.pop();
424424

425-
if (Inst->hasNUsesOrMore(UsesThreshold))
425+
if (MatchedOps > 0 && Inst->hasNUsesOrMore(UsesThreshold))
426426
return false;
427427

428428
auto Op = getOperation(Inst);
@@ -536,6 +536,11 @@ class BfnMatcher {
536536
SrcsOrdered[2] = NegCSrc2;
537537
}
538538

539+
if (!isProfitable()) {
540+
LLVM_DEBUG(dbgs() << "BFN: Not profitable\n");
541+
return false;
542+
}
543+
539544
IRBuilder<> Builder(MainInst);
540545

541546
auto Lut = getLutValue(MainInst);
@@ -643,10 +648,30 @@ class BfnMatcher {
643648
return false;
644649
}
645650

651+
static bool isFlagInput(Value *V) {
652+
auto *Cast = dyn_cast<BitCastInst>(V);
653+
if (!Cast)
654+
return false;
655+
656+
auto *Src = Cast->getOperand(0);
657+
auto *SrcTy = Src->getType();
658+
return SrcTy->isIntOrIntVectorTy(1);
659+
}
660+
661+
bool isProfitable() const {
662+
unsigned NumOfFlagInputs = llvm::count_if(SrcsOrdered, isFlagInput);
663+
if (NumOfFlagInputs >= MatchedOps)
664+
return false;
665+
666+
return true;
667+
}
668+
646669
BinaryOperator *MainInst;
647670
const bool TryGreedy;
648671
SmallSetVector<Value *, 4> Srcs;
649672
SmallVector<Value *, 4> SrcsOrdered;
673+
674+
unsigned MatchedOps = 0;
650675
};
651676

652677
// Class to identify cases where a comparison and select are equivalent to a

IGC/VectorCompiler/test/PatternMatch/bfn_match.ll

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -108,24 +108,27 @@ define i32 @test_match_i32_combine_by_mask_const(i32 %a, i32 %b) {
108108
ret i32 %3
109109
}
110110

111-
define i32 @test_match_i32_const_zero(i32 %a, i32 %b, i32 %mask) {
112-
; CHECK: ret i32 0
113-
%nmask = xor i32 -1, %mask
114-
%1 = and i32 %a, %mask
115-
%2 = and i32 %b, %nmask
116-
%3 = or i32 %1, %2
117-
%4 = xor i32 %3, -1
118-
%5 = and i32 %3, %4
119-
ret i32 %5
111+
declare void @use(i32)
112+
113+
; CHECK-LABEL: @test_unmatch_i32(
114+
define i32 @test_unmatch_i32(i32 %op0, i32 %op1, i32 %op2) {
115+
; CHECK-NOT: call void @llvm.genx.bfn.i32.i32(
116+
%1 = and i32 %op0, %op1
117+
%2 = and i32 %1, %op2
118+
call void @use(i32 %1)
119+
120+
ret i32 %2
120121
}
121122

122-
define i32 @test_match_i32_const_one(i32 %a, i32 %b, i32 %mask) {
123-
; CHECK: ret i32 -1
124-
%nmask = xor i32 -1, %mask
125-
%1 = and i32 %a, %mask
126-
%2 = and i32 %b, %nmask
127-
%3 = or i32 %1, %2
128-
%4 = xor i32 %3, -1
129-
%5 = or i32 %3, %4
130-
ret i32 %5
123+
; CHECK-LABEL: @test_unmatch_flag
124+
define i32 @test_unmatch_flag(<32 x i1> %a, <32 x i1> %b, <32 x i1> %c) {
125+
%as = bitcast <32 x i1> %a to i32
126+
%bs = bitcast <32 x i1> %b to i32
127+
%cs = bitcast <32 x i1> %c to i32
128+
129+
; CHECK-NOT: call i32 @llvm.genx.bfn.i32.i32(
130+
%1 = and i32 %as, %bs
131+
%2 = and i32 %bs, %cs
132+
133+
ret i32 %2
131134
}

0 commit comments

Comments
 (0)