Skip to content

Commit 4a09bac

Browse files
vmustyaigcbot
authored andcommitted
Disable predicate promotion to scalar for fused EU in VC
Due to the fused EU hardware restrictions, VC should promote predicate operations into vector operations instead of scalar ones.
1 parent bed3ead commit 4a09bac

File tree

2 files changed

+39
-7
lines changed

2 files changed

+39
-7
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPromotePredicate.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,22 @@ SPDX-License-Identifier: MIT
1616
///
1717
//===----------------------------------------------------------------------===//
1818

19-
2019
#include "GenX.h"
20+
#include "GenXSubtarget.h"
21+
#include "GenXTargetMachine.h"
2122
#include "GenXUtil.h"
23+
2224
#include "llvm/ADT/EquivalenceClasses.h"
2325
#include "llvm/ADT/Statistic.h"
26+
#include "llvm/CodeGen/TargetPassConfig.h"
2427
#include "llvm/IR/IRBuilder.h"
2528
#include "llvm/IR/InstIterator.h"
29+
#include "llvm/InitializePasses.h"
2630
#include "llvm/Pass.h"
2731

2832
#include "llvmWrapper/IR/DerivedTypes.h"
2933

30-
#define DEBUG_TYPE "GENX_PROMOTE_PREDICATE"
34+
#define DEBUG_TYPE "genx-promote-predicate"
3135

3236
using namespace llvm;
3337
using namespace genx;
@@ -48,6 +52,7 @@ class GenXPromotePredicate : public FunctionPass {
4852
bool runOnFunction(Function &F) override;
4953
StringRef getPassName() const override { return "GenXPromotePredicate"; }
5054
void getAnalysisUsage(AnalysisUsage &AU) const override {
55+
AU.addRequired<TargetPassConfig>();
5156
AU.setPreservesCFG();
5257
}
5358
};
@@ -61,6 +66,7 @@ void initializeGenXPromotePredicatePass(PassRegistry &);
6166
}
6267
INITIALIZE_PASS_BEGIN(GenXPromotePredicate, "GenXPromotePredicate",
6368
"GenXPromotePredicate", false, false)
69+
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
6470
INITIALIZE_PASS_END(GenXPromotePredicate, "GenXPromotePredicate",
6571
"GenXPromotePredicate", false, false)
6672

@@ -138,8 +144,9 @@ static Value *promoteInstToScalar(Instruction *Inst) {
138144

139145
// Promote one predicate instruction to grf - promote all its operands and
140146
// instruction itself, and then sink the result back to predicate.
141-
static Value *promoteInst(Instruction *Inst) {
142-
if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType())) {
147+
static Value *promoteInst(Instruction *Inst, bool AllowScalarPromotion) {
148+
if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType());
149+
VTy && AllowScalarPromotion) {
143150
IGC_ASSERT(VTy->isIntOrIntVectorTy(1));
144151
auto Width = VTy->getNumElements();
145152

@@ -220,7 +227,8 @@ static void foldBitcast(BitCastInst *Cast) {
220227
class PredicateWeb {
221228
public:
222229
template <class InputIt>
223-
PredicateWeb(InputIt first, InputIt last) : Web(first, last) {}
230+
PredicateWeb(InputIt First, InputIt Last, bool AllowScalar)
231+
: Web(First, Last), AllowScalarPromotion(AllowScalar) {}
224232
void print(llvm::raw_ostream &O) const {
225233
for (auto Inst : Web)
226234
O << *Inst << '\n';
@@ -236,7 +244,7 @@ class PredicateWeb {
236244
// Do promotion.
237245
SmallVector<Instruction *, 8> Worklist;
238246
for (auto *Inst : Web) {
239-
auto *PromotedInst = promoteInst(Inst);
247+
auto *PromotedInst = promoteInst(Inst, AllowScalarPromotion);
240248

241249
if (isa<TruncInst>(PromotedInst) || isa<BitCastInst>(PromotedInst))
242250
Worklist.push_back(cast<Instruction>(PromotedInst));
@@ -254,6 +262,7 @@ class PredicateWeb {
254262

255263
private:
256264
SmallPtrSet<Instruction *, 16> Web;
265+
bool AllowScalarPromotion;
257266
};
258267

259268
constexpr const char IdxMDName[] = "pred.index";
@@ -273,6 +282,11 @@ struct Comparator {
273282
};
274283

275284
bool GenXPromotePredicate::runOnFunction(Function &F) {
285+
auto &ST = getAnalysis<TargetPassConfig>()
286+
.getTM<GenXTargetMachine>()
287+
.getGenXSubtarget();
288+
bool AllowScalarPromotion = !ST.hasFusedEU();
289+
276290
// Put every predicate instruction into its own equivalence class.
277291
long Idx = 0;
278292
llvm::EquivalenceClasses<Instruction *, Comparator> PredicateWebs;
@@ -303,7 +317,8 @@ bool GenXPromotePredicate::runOnFunction(Function &F) {
303317
for (auto I = PredicateWebs.begin(), E = PredicateWebs.end(); I != E; ++I) {
304318
if (!I->isLeader())
305319
continue;
306-
PredicateWeb Web(PredicateWebs.member_begin(I), PredicateWebs.member_end());
320+
PredicateWeb Web(PredicateWebs.member_begin(I), PredicateWebs.member_end(),
321+
AllowScalarPromotion);
307322
LLVM_DEBUG(dbgs() << "Predicate web:\n"; Web.dump());
308323
++NumCollectedPredicateWebs;
309324
if (!Web.isBeneficialToPromote())

IGC/VectorCompiler/test/PromotePredicate/ispc-example.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
1010
; RUN: -mcpu=Gen9 -logical-ops-threshold=2 -S < %s | FileCheck %s
1111

12+
; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
13+
; RUN: -mcpu=XeHPG -logical-ops-threshold=2 -S < %s | FileCheck --check-prefix=FUSED %s
14+
15+
; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
16+
; RUN: -mcpu=XeHPC -logical-ops-threshold=2 -S < %s | FileCheck %s
17+
1218
; CHECK-LABEL: f_f
1319
; CHECK-DAG: [[LESSEQUAL_A_LOAD_widened:%.*]] = bitcast <8 x i1> %lessequal_a_load_ to i8
1420
; CHECK-DAG: [[EQUAL_A_LOAD5_widened:%.*]] = bitcast <8 x i1> %equal_a_load5_ to i8
@@ -21,6 +27,17 @@
2127
; CHECK-DAG: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1> [[NEG_RETURNED_LANES]], i32 0, <8 x i64> %new_offsets.i.i34, <8 x float> zeroinitializer)
2228
; CHECK-DAG: icmp eq i8 [[LOGICAL_AND_promoted]], -1
2329

30+
; FUSED-LABEL: f_f
31+
; FUSED-DAG: [[LESSEQUAL_A_LOAD_widened:%.*]] = sext <8 x i1> %lessequal_a_load_ to <8 x i16>
32+
; FUSED-DAG: [[EQUAL_A_LOAD5_widened:%.*]] = sext <8 x i1> %equal_a_load5_ to <8 x i16>
33+
; FUSED-DAG: [[LOGICAL_AND_promoted:%.*]] = and <8 x i16> [[LESSEQUAL_A_LOAD_widened]], [[EQUAL_A_LOAD5_widened]]
34+
; FUSED-DAG: [[LOGICAL_AND:%.*]] = icmp ne <8 x i16> [[LOGICAL_AND_promoted]], zeroinitializer
35+
; FUSED-DAG: call i1 @llvm.genx.any.v8i1(<8 x i1> [[LOGICAL_AND]])
36+
; FUSED-DAG: [[RETURNED_LANES_MEMORY_0_promoted:%.*]] = phi <8 x i16> [ [[LOGICAL_AND_promoted]], %safe_if_run_true.safe_if_after_true_crit_edge ], [ zeroinitializer, %allocas.safe_if_after_true_crit_edge ]
37+
; FUSED-DAG: [[NEG_RETURNED_LANES_promoted:%.*]] = xor <8 x i16> [[RETURNED_LANES_MEMORY_0_promoted]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
38+
; FUSED-DAG: [[NEG_RETURNED_LANES:%.*]] = icmp ne <8 x i16> [[NEG_RETURNED_LANES_promoted]], zeroinitializer
39+
; FUSED-DAG: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1> [[NEG_RETURNED_LANES]], i32 0, <8 x i64> %new_offsets.i.i34, <8 x float> zeroinitializer)
40+
2441
declare void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1>, i32, <8 x i64>, <8 x float>)
2542
declare i1 @llvm.genx.any.v8i1(<8 x i1>)
2643
declare <8 x float> @llvm.genx.svm.block.ld.unaligned.v8f32.i64(i64)

0 commit comments

Comments
 (0)