Skip to content

Commit ccd903b

Browse files
DianaChensys_zuul
authored andcommitted
vISA: set FFID to sr0 if there is cr0 write in the shader
Change-Id: I965b25ba3434c557d50b7c447bf7aea713c588ed
1 parent bd89090 commit ccd903b

File tree

7 files changed

+185
-7
lines changed

7 files changed

+185
-7
lines changed

visa/BinaryEncodingIGA.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,13 +610,14 @@ void BinaryEncodingIGA::DoAll()
610610
return false;
611611
};
612612

613-
// Make the size of the first BB is multiple of 4 instructions, and do not compact
613+
// Make the size of the first BB be multiple of 4 instructions, and do not compact
614614
// any instructions in it, so that the size of the first BB is multiple of 64 bytes
615-
if (kernel.fg.builder->getHasPerThreadProlog())
615+
if (kernel.fg.builder->getHasPerThreadProlog() ||
616+
kernel.fg.builder->getHasComputeFFIDProlog())
616617
{
617618
G4_BB* first_bb = *kernel.fg.begin();
618619
size_t num_inst = first_bb->getInstList().size();
619-
assert(num_inst != 0 && "ThreadProlog must not be empty");
620+
assert(num_inst != 0 && "the first BB must not be empty");
620621
// label instructions don't count. Only the first instruction could be a label
621622
if (first_bb->getInstList().front()->isLabel())
622623
--num_inst;
@@ -1049,6 +1050,18 @@ void BinaryEncodingIGA::DoAll()
10491050
assert(iter != secondBB->end() && "execpt at least one non-label inst in second BB");
10501051
kernel.fg.builder->getJitInfo()->offsetToSkipPerThreadDataLoad = (uint32_t)(*iter)->getGenOffset();
10511052
}
1053+
if (kernel.fg.builder->getHasComputeFFIDProlog())
1054+
{
1055+
// something weird will happen if both HasPerThreadProlog and HasComputeFFIDProlog
1056+
assert(!kernel.fg.builder->getHasPerThreadProlog());
1057+
1058+
// set offsetToSkipSetFFIDGP to the second entry's offset
1059+
// the first instruction in the second BB is the start of the sencond entry
1060+
assert(kernel.fg.getNumBB() > 1 && "expect at least one prolog BB");
1061+
auto secondBB = *(std::next(kernel.fg.begin()));
1062+
assert(!secondBB->empty() && !secondBB->front()->isLabel());
1063+
kernel.fg.builder->getJitInfo()->offsetToSkipSetFFIDGP = (uint32_t)secondBB->front()->getGenOffset();
1064+
}
10521065
}
10531066

10541067
SWSB_ENCODE_MODE BinaryEncodingIGA::getIGASWSBEncodeMode(const IR_Builder& builder) {

visa/BuildIR.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,8 @@ class IR_Builder {
511511

512512
int perThreadInputSize = 0;
513513
bool hasPerThreadProlog = false;
514+
// Have inserted two entires prolog for setting FFID for compute shaders
515+
bool hasComputeFFIDProlog = false;
514516

515517
public:
516518
PreDefinedVars preDefVars;
@@ -616,6 +618,9 @@ class IR_Builder {
616618
bool getHasPerThreadProlog() const { return hasPerThreadProlog; }
617619
void setHasPerThreadProlog() { hasPerThreadProlog = true; }
618620

621+
bool getHasComputeFFIDProlog() const { return hasComputeFFIDProlog; }
622+
void setHasComputeFFIDProlog() { hasComputeFFIDProlog = true; }
623+
619624
bool isOpndAligned( G4_Operand *opnd, unsigned short &offset, int align_byte );
620625

621626
// check if opnd is or can be made "alignByte"-byte aligned. This function will change the underlying
@@ -1099,7 +1104,7 @@ class IR_Builder {
10991104
return inst;
11001105
}
11011106

1102-
G4_INST* createSpill(G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload, unsigned int execSize,
1107+
G4_INST* createSpill(G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload, unsigned int execSize,
11031108
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option, unsigned int lineno = 0, int CISAoff = -1,
11041109
const char* srcFilename = nullptr)
11051110
{
@@ -1119,7 +1124,7 @@ class IR_Builder {
11191124
auto builtInR0 = getBuiltinR0();
11201125
auto rd = getRegionStride1();
11211126
auto srcRgnr0 = createSrcRegRegion(Mod_src_undef, Direct, builtInR0->getRegVar(), 0, 0, rd, Type_UD);
1122-
G4_INST* spill = createIntrinsicInst(nullptr, Intrinsic::Spill, execSize, dst,
1127+
G4_INST* spill = createIntrinsicInst(nullptr, Intrinsic::Spill, execSize, dst,
11231128
srcRgnr0, payload, nullptr, option, lineno);
11241129
spill->asSpillIntrinsic()->setSrcFilename(srcFilename);
11251130
spill->asSpillIntrinsic()->setCISAOff(CISAoff);
@@ -1129,7 +1134,7 @@ class IR_Builder {
11291134
return spill;
11301135
}
11311136

1132-
G4_INST* createFill(G4_SrcRegRegion* header, G4_DstRegRegion* dstData, unsigned int execSize, uint16_t numRows, uint32_t offset,
1137+
G4_INST* createFill(G4_SrcRegRegion* header, G4_DstRegRegion* dstData, unsigned int execSize, uint16_t numRows, uint32_t offset,
11331138
G4_Declare* fp, G4_InstOption option, unsigned int lineno = 0, int CISAoff = -1, const char* srcFilename = nullptr)
11341139
{
11351140
G4_INST* fill = createIntrinsicInst(nullptr, Intrinsic::Fill, execSize, dstData,

visa/Optimizer.cpp

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ void Optimizer::initOptimizations()
596596
INITIALIZE_PASS(dce, vISA_EnableDCE, TIMER_OPTIMIZER);
597597
INITIALIZE_PASS(reassociateConst, vISA_reassociate, TIMER_OPTIMIZER);
598598
INITIALIZE_PASS(split4GRFVars, vISA_split4GRFVar, TIMER_OPTIMIZER);
599+
INITIALIZE_PASS(addFFIDProlog, vISA_addFFIDProlog, TIMER_MISC_OPTS);
599600
INITIALIZE_PASS(loadThreadPayload, vISA_loadThreadPayload, TIMER_MISC_OPTS);
600601
INITIALIZE_PASS(insertFenceBeforeEOT, vISA_EnableAlways, TIMER_MISC_OPTS);
601602
INITIALIZE_PASS(insertScratchReadBeforeEOT, vISA_clearScratchWritesBeforeEOT, TIMER_MISC_OPTS);
@@ -1191,6 +1192,8 @@ int Optimizer::optimization()
11911192

11921193
runPass(PI_loadThreadPayload);
11931194

1195+
runPass(PI_addFFIDProlog);
1196+
11941197
// Insert a dummy compact instruction if requested for SKL+
11951198
runPass(PI_insertDummyCompactInst);
11961199

@@ -8067,6 +8070,137 @@ void genBucket(G4_INST *send, Bucket *bucket, RW rwType) {
80678070
}
80688071
}
80698072

8073+
// create prolog to set sr0 to FFID. TGL WA.
8074+
// Do only when there is cr0 write inside the kernel
8075+
void Optimizer::addFFIDProlog()
8076+
{
8077+
if (!builder.getIsKernel())
8078+
return;
8079+
8080+
FFID ffid = static_cast<FFID>(builder.getOptions()->getuInt32Option(vISA_setFFID));
8081+
// return if FFID is not given
8082+
if (ffid == FFID_INVALID)
8083+
return;
8084+
8085+
// return if there is no cr0 write
8086+
bool has_cr0_dst = false;
8087+
for (auto bb : kernel.fg)
8088+
{
8089+
for (G4_INST* inst : *bb)
8090+
{
8091+
if (inst->getDst() != nullptr &&
8092+
inst->getDst()->asDstRegRegion()->getBase()->isCrReg())
8093+
{
8094+
has_cr0_dst = true;
8095+
break;
8096+
}
8097+
}
8098+
if (has_cr0_dst)
8099+
break;
8100+
}
8101+
if (!has_cr0_dst)
8102+
return;
8103+
8104+
// get r127.0 decl
8105+
G4_Declare* rtail =
8106+
builder.createHardwiredDeclare(8, Type_UD, kernel.getNumRegTotal() - 1, 0);
8107+
8108+
// (W) and (1|M0) r127.0 <1>:ud sr0.0 <0;1,0>:ud 0xF0FFFFFF:ud
8109+
auto createAnd = [this, &rtail]()
8110+
{
8111+
auto src0 = builder.createSrcRegRegion(
8112+
Mod_src_undef, Direct, builder.phyregpool.getSr0Reg(), 0, 0,
8113+
builder.getRegionScalar(), Type_UD);
8114+
auto src1 = builder.createImm(0xF0FFFFFF, Type_UD);
8115+
auto dst = builder.createDstRegRegion(Direct, rtail->getRegVar(), 0, 0, 1, Type_UD);
8116+
8117+
return builder.createInternalInst(nullptr, G4_and, nullptr, false, 1,
8118+
dst, src0, src1, InstOpt_WriteEnable);
8119+
};
8120+
8121+
// (W) or (1|M0) sr0.0<1>:ud 127.0<0;1,0>:ud imm:ud
8122+
auto createOr = [this, &rtail](uint32_t imm)
8123+
{
8124+
auto src0 = builder.createSrcRegRegion(
8125+
Mod_src_undef, Direct, rtail->getRegVar(), 0, 0,
8126+
builder.getRegionScalar(), Type_UD);
8127+
auto src1 = builder.createImm(imm, Type_UD);
8128+
auto dst = builder.createDstRegRegion(Direct,
8129+
builder.phyregpool.getSr0Reg(), 0, 0, 1, Type_UD);
8130+
8131+
return builder.createInternalInst(nullptr, G4_or, nullptr, false, 1,
8132+
dst, src0, src1, InstOpt_WriteEnable);
8133+
};
8134+
8135+
// (W) jmpi (1|M0) label
8136+
auto createJmpi = [this](G4_Label* label)
8137+
{
8138+
return builder.createInternalInst(nullptr, G4_jmpi, nullptr, false, 1,
8139+
nullptr, label, nullptr, InstOpt_WriteEnable);
8140+
};
8141+
8142+
auto createLabelInst = [this](G4_Label* label)
8143+
{
8144+
return kernel.fg.createNewLabelInst(label);
8145+
};
8146+
8147+
// for compute shader, create two entris
8148+
if (ffid == FFID_GP || ffid == FFID_GP1)
8149+
{
8150+
// Entry0: Set sr0 to FFID_GP (0x7)
8151+
// (W) and (1|M0) r127.0 <1>:ud sr0.0 <0;1,0>:ud 0xF0FFFFFF:ud
8152+
// (W) or (1|M0) sr0.0<1>:ud 127.0<0;1,0>:ud 0x07000000:ud
8153+
// jmpi ffid_prolog_end
8154+
// Entry1: Set sr0 to FFID_GP1 (0x8)
8155+
// (W) and (1|M0) r127.0 <1>:ud sr0.0 <0;1,0>:ud 0xF0FFFFFF:ud
8156+
// (W) or (1|M0) sr0.0<1>:ud 127.0<0;1,0>:ud 0x08000000:ud
8157+
// ffid_prolog_end:
8158+
8159+
// Put the entry0 block into a new BB, so that we can make it 64-bit
8160+
// aligned in BinaryEncodingIGA
8161+
G4_BB* entry_0_bb = kernel.fg.createNewBB();
8162+
entry_0_bb->push_back(createAnd());
8163+
entry_0_bb->push_back(createOr(0x07000000));
8164+
8165+
// get jmp target label. If the next bb has no label, create one and insert it
8166+
// at the beginning
8167+
G4_Label* jmp_label = nullptr;
8168+
assert(kernel.fg.begin() != kernel.fg.end());
8169+
G4_BB* next_bb = *kernel.fg.begin();
8170+
if (next_bb->front()->isLabel())
8171+
{
8172+
jmp_label = next_bb->front()->getSrc(0)->asLabel();
8173+
}
8174+
else
8175+
{
8176+
std::string label_name("ffid_prolog_end");
8177+
jmp_label = builder.createLabel(label_name, LABEL_BLOCK);
8178+
next_bb->insert(next_bb->begin(), createLabelInst(jmp_label));
8179+
}
8180+
entry_0_bb->push_back(createJmpi(jmp_label));
8181+
8182+
// Put the rest in another BB
8183+
G4_BB* entry_1_bb = kernel.fg.createNewBB();
8184+
entry_1_bb->push_back(createAnd());
8185+
entry_1_bb->push_back(createOr(0x08000000));
8186+
8187+
// add these two BB to be the first two in the shader
8188+
kernel.fg.addPrologBB(entry_1_bb);
8189+
kernel.fg.addPrologBB(entry_0_bb);
8190+
builder.setHasComputeFFIDProlog();
8191+
}
8192+
else
8193+
{
8194+
// for other shaders, set the FFID
8195+
// (W) and (1|M0) r127.0 <1>:ud sr0.0 <0;1,0>:ud 0xF0FFFFFF:ud
8196+
// (W) or (1|M0) sr0.0<1>:ud 127.0<0;1,0>:ud (FFID << 24):ud
8197+
G4_BB* bb = kernel.fg.createNewBB();
8198+
bb->push_back(createAnd());
8199+
bb->push_back(createOr(ffid << 24));
8200+
kernel.fg.addPrologBB(bb);
8201+
}
8202+
}
8203+
80708204
void Optimizer::loadThreadPayload()
80718205
{
80728206
}

visa/Optimizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ class Optimizer
244244
void clearARFDependencies();
245245
void clearSendDependencies();
246246
void loadThreadPayload();
247+
void addFFIDProlog();
247248
void insertFenceBeforeEOT();
248249
void insertScratchReadBeforeEOT();
249250
void resetA0();
@@ -337,6 +338,7 @@ class Optimizer
337338
PI_reassociateConst,
338339
PI_split4GRFVars,
339340
PI_loadThreadPayload,
341+
PI_addFFIDProlog,
340342
PI_insertFenceBeforeEOT,
341343
PI_insertScratchReadBeforeEOT,
342344
PI_mapOrphans,

visa/include/JitterDataStruct.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,12 @@ typedef struct _CM_JIT_INFO {
7575
unsigned int freeGRFInfoSize;
7676
unsigned char numBytesScratchGtpin;
7777

78-
uint32_t offsetToSkipPerThreadDataLoad = 0;
78+
uint32_t offsetToSkipPerThreadDataLoad = 0;
79+
80+
// When two entries prolog is added for setting FFID
81+
// for compute (GP or GP1), skip this offset to set FFID_GP1.
82+
// Will set FFID_GP if not skip
83+
uint32_t offsetToSkipSetFFIDGP = 0;
7984
} FINALIZER_INFO;
8085

8186
#endif // _CM_JITTERDATASTRUCT_

visa/include/VISAOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ DEF_VISA_OPTION(vISA_GetFreeGRFInfo, ET_BOOL, "-getfreegrfinfo", UNUSED
208208
DEF_VISA_OPTION(vISA_clearScratchWritesBeforeEOT, ET_BOOL, NULLSTR, UNUSED, false)
209209
DEF_VISA_OPTION(vISA_clearHDCWritesBeforeEOT, ET_BOOL, NULLSTR, UNUSED, false)
210210
DEF_VISA_OPTION(vISA_setA0toTdrForSendc, ET_BOOL, "-setA0toTdrForSendc", UNUSED, false)
211+
DEF_VISA_OPTION(vISA_addFFIDProlog, ET_BOOL, "-noFFIDProlog", UNUSED, true)
212+
DEF_VISA_OPTION(vISA_setFFID, ET_INT32, "-setFFID", "USAGE: -setFFID <ffid>\n", FFID_INVALID)
211213

212214
//=== HW debugging options ===
213215
DEF_VISA_OPTION(vISA_GenerateDebugInfo, ET_BOOL, "-generateDebugInfo", UNUSED, false)

visa/include/visa_igc_common_header.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,4 +625,21 @@ typedef struct _vISA_RT_CONTROLS
625625
} vISA_RT_CONTROLS;
626626

627627

628+
// FixedFunctionID: these are hardware FFID values
629+
enum FFID
630+
{
631+
FFID_NULL = 0x0,
632+
FFID_VSR = 0x3,
633+
FFID_HS = 0x4,
634+
FFID_DS = 0x5,
635+
FFID_TS = 0x6,
636+
FFID_GP = 0x7,
637+
FFID_GP1 = 0x8,
638+
FFID_VS = 0x9,
639+
FFID_GS = 0xC,
640+
FFID_PS = 0xF,
641+
642+
FFID_INVALID = 0xFF
643+
};
644+
628645
#endif

0 commit comments

Comments
 (0)