Skip to content

Commit 44d7955

Browse files
fangliu2020igcbot
authored andcommitted
Enable vISA for XeHP platform
1 parent aba71d6 commit 44d7955

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+6632
-101
lines changed

visa/AccSubstitution.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ struct AccInterval
4242
}
4343
int dist = lastUse - inst->getLocalId();
4444

45-
return std::pow((double)inst->use_size(), 3) / dist;
45+
//Bundle conflict has higher priority than bank conflict. Because bundle conflict means bank conflict at the same time.
46+
return (std::pow((double)(bundleConflictTimes + 1), 3) + std::pow((double)(bankConflictTimes + 1), 2) + std::pow((double)inst->use_size(), 3) / dist) / (suppressionTimes + 1);
4647
}
4748

4849
// see if this interval needs both halves of the acc
@@ -53,6 +54,7 @@ struct AccInterval
5354
case Type_F:
5455
return inst->getExecSize() == G4_ExecSize(builder.getNativeExecSize() * 2);
5556
case Type_HF:
57+
case Type_BF:
5658
return false;
5759
case Type_DF:
5860
return inst->getExecSize() > G4_ExecSize(builder.getNativeExecSize() / 2);
@@ -852,6 +854,23 @@ void AccSubPass::multiAccSub(G4_BB* bb)
852854

853855
std::map<G4_INST*, unsigned int> BCInfo;
854856

857+
if (builder.getPlatform() == GENX_XE_HP)
858+
{
859+
int suppressRegs[4];
860+
for (int i = 0; i < 3; i++)
861+
{
862+
suppressRegs[i] = -1;
863+
}
864+
suppressRegs[3] = -1;
865+
866+
//Do bank conflict analysis for the BB
867+
for (auto instIter = bb->begin(), instEnd = bb->end(); instIter != instEnd; ++instIter)
868+
{
869+
G4_INST* inst = *instIter;
870+
bankConflictAnalysisTGL(inst, suppressRegs, &BCInfo);
871+
}
872+
}
873+
855874
//build intervals for potential acc candidates as well as pre-existing acc uses from mac/mach/addc/etc
856875
for (auto instIter = bb->begin(), instEnd = bb->end(); instIter != instEnd; ++instIter)
857876
{

visa/Attributes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ Attributes::ID Attributes::getAttributeID(const char* AttrName)
7676
{ // start with a lower case 'p'
7777
return ATTR_PerThreadInputSize;
7878
}
79+
if (aName == "crossThreadInputSize")
80+
{ // start with a lower case 'c'
81+
return ATTR_CrossThreadInputSize;
82+
}
7983
if (aName == "perThreadInputSize")
8084
{ // start with a lower case 'p'
8185
return ATTR_PerThreadInputSize;

visa/BinaryEncodingIGA.cpp

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,77 @@ class BinaryEncodingIGA
234234
}
235235
}
236236

237+
// get IGA type from GenPrecision
238+
iga::Type getIGAPrecisionType(GenPrecision p) const
239+
{
240+
switch (p)
241+
{
242+
case GenPrecision::U1: return iga::Type::U1;
243+
case GenPrecision::U2: return iga::Type::U2;
244+
case GenPrecision::U4: return iga::Type::U4;
245+
case GenPrecision::U8: return iga::Type::UB;
246+
case GenPrecision::S2: return iga::Type::S2;
247+
case GenPrecision::S4: return iga::Type::S4;
248+
case GenPrecision::S8: return iga::Type::B;
249+
case GenPrecision::FP16: return iga::Type::HF;
250+
case GenPrecision::BF16: return iga::Type::BF;
251+
default:
252+
assert(false && "illegal Operand Precision");
253+
return iga::Type::INVALID;
254+
}
255+
}
256+
257+
iga::Type getIGADpasType(G4_InstDpas* DpasInst, int SrcOprdIx) const
258+
{
259+
iga::Type ty;
260+
switch (SrcOprdIx) {
261+
default:
262+
MUST_BE_TRUE(false, "Invalid SrcOprdIx!");
263+
break;
264+
case 0:
265+
{
266+
G4_Operand* src0 = DpasInst->getSrc(0);
267+
if (src0->isNullReg()) {
268+
ty = getIGAType(DpasInst->getDst()->getType(), platform);
269+
}
270+
else
271+
{
272+
ty = getIGAType(src0->getType(), platform);
273+
}
274+
break;
275+
}
276+
case 1:
277+
ty = getIGAPrecisionType(DpasInst->getSrc1Precision());
278+
break;
279+
case 2:
280+
ty = getIGAPrecisionType(DpasInst->getSrc2Precision());
281+
break;
282+
}
283+
return ty;
284+
}
285+
286+
iga::RegRef getIGADpasRegRef(G4_InstDpas* DpasInst, int SrcOprdIx) const
287+
{
288+
G4_Operand* src = DpasInst->getSrc(SrcOprdIx);
289+
iga::RegRef regref = getIGARegRef(src);
290+
if (SrcOprdIx == 2) {
291+
// By default, subRegNum is in terms of operand's type (D/UD for
292+
// dpas's src1/2). IGA needs it to be in terms of precision type.
293+
// Note that no need to do it for src1 as it must be grf-aligned!
294+
assert((regref.subRegNum % 2) == 0 &&
295+
"Minimum alignemnt of dpas's src2 must be QW");
296+
uint32_t bitOffsets = regref.subRegNum * src->getTypeSize() * 8;
297+
uint32_t PBits = G4_InstDpas::GetPrecisionSizeInBits(DpasInst->getSrc2Precision());
298+
regref.subRegNum = bitOffsets / PBits;
299+
}
300+
return regref;
301+
}
302+
303+
static iga::BfnFC getBfnFC(const G4_INST *inst)
304+
{
305+
uint8_t funcCtrl = inst->asBfnInst()->getBooleanFuncCtrl();
306+
return iga::BfnFC(funcCtrl);
307+
}
237308
static iga::SFID getSFID(const G4_INST* inst);
238309
static iga::MathFC getMathFC(const G4_INST *inst);
239310
iga::Type getIGAType(const G4_INST* I, Gen4_Operand_Number O, TARGET_PLATFORM P);
@@ -264,6 +335,9 @@ Platform BinaryEncodingIGA::getIGAInternalPlatform(TARGET_PLATFORM genxPlatform)
264335
case GENX_TGLLP:
265336
platform = Platform::XE;
266337
break;
338+
case GENX_XE_HP:
339+
platform = Platform::XE_HP;
340+
break;
267341
default:
268342
break;
269343
}
@@ -553,6 +627,21 @@ std::pair<const iga::OpSpec *,iga::Subfunction> BinaryEncodingIGA::getIgaOpInfo(
553627
case G4_dp3: igaOp = iga::Op::DP3; break;
554628
case G4_dp2: igaOp = iga::Op::DP2; break;
555629
case G4_dp4a: igaOp = iga::Op::DP4A; break;
630+
case G4_dpas:
631+
case G4_dpasw:
632+
{
633+
igaOp = inst->opcode() == G4_dpasw ? iga::Op::DPASW : iga::Op::DPAS;
634+
G4_InstDpas* dpasInst = inst->asDpasInst();
635+
uint8_t D = dpasInst->getSystolicDepth();
636+
uint8_t C = dpasInst->getRepeatCount();
637+
sf = iga::GetDpasFC(D, C);
638+
break;
639+
}
640+
case G4_add3: igaOp = iga::Op::ADD3; break;
641+
case G4_bfn:
642+
igaOp = iga::Op::BFN;
643+
sf = getBfnFC(inst);
644+
break;
556645
case G4_line: igaOp = iga::Op::LINE; break;
557646
case G4_pln: igaOp = iga::Op::PLN; break;
558647
case G4_mad: igaOp = iga::Op::MAD; break;
@@ -619,8 +708,35 @@ void BinaryEncodingIGA::SetSWSB(G4_INST *inst, iga::SWSB &sw)
619708
sw.sbid = inst->getToken();
620709
}
621710

711+
// Set distance, e.g. A@1
712+
using DistanceType = vISA::G4_INST::DistanceType;
622713
if ((unsigned)inst->getDistance())
623714
{
715+
// check the distance type for multi-dist-pipes
716+
if (kernel.fg.builder->hasThreeALUPipes() ||
717+
kernel.fg.builder->hasFourALUPipes()) {
718+
switch (inst->getDistanceTypeXe())
719+
{
720+
case DistanceType::DIST:
721+
sw.distType = SWSB::DistType::REG_DIST;
722+
break;
723+
case DistanceType::DISTALL:
724+
sw.distType = SWSB::DistType::REG_DIST_ALL;
725+
break;
726+
case DistanceType::DISTINT:
727+
sw.distType = SWSB::DistType::REG_DIST_INT;
728+
break;
729+
case DistanceType::DISTFLOAT:
730+
sw.distType = SWSB::DistType::REG_DIST_FLOAT;
731+
break;
732+
case DistanceType::DISTLONG:
733+
sw.distType = SWSB::DistType::REG_DIST_LONG;
734+
break;
735+
default:
736+
break;
737+
}
738+
}
739+
else
624740
{
625741
// there is only one pipe on single-dist-pipe platform,
626742
// must be REG_DIST
@@ -765,6 +881,8 @@ void BinaryEncodingIGA::Encode()
765881
SWSB::InstType instTy = SWSB::InstType::UNKNOWN;
766882
if (inst->isMath())
767883
instTy = SWSB::InstType::MATH;
884+
else if (inst->isDpas())
885+
instTy = SWSB::InstType::DPAS;
768886
else if (inst->isSend())
769887
instTy = SWSB::InstType::SEND;
770888
else
@@ -1157,6 +1275,22 @@ void BinaryEncodingIGA::translateInstructionSrcs(
11571275
region,
11581276
type);
11591277
}
1278+
else if (inst->isDpas())
1279+
{
1280+
assert(srcRegion->getRegAccess() == Direct &&
1281+
"dpas does not support indirect GRF operands");
1282+
G4_InstDpas* dpasInst = inst->asDpasInst();
1283+
RegRef regRef = getIGADpasRegRef(dpasInst, i);
1284+
type = getIGADpasType(dpasInst, i);
1285+
1286+
igaInst->setDirectSource(
1287+
opIx,
1288+
srcMod,
1289+
getIGARegName(srcRegion),
1290+
regRef,
1291+
region,
1292+
type);
1293+
}
11601294
else if (srcRegion->getRegAccess() == Direct)
11611295
{
11621296
igaInst->setDirectSource(
@@ -1201,7 +1335,6 @@ void BinaryEncodingIGA::translateInstructionSrcs(
12011335
} // for
12021336
}
12031337

1204-
12051338
SendDesc BinaryEncodingIGA::getIGASendDesc(G4_INST* sendInst) const
12061339
{
12071340
SendDesc desc;
@@ -1319,8 +1452,19 @@ iga::SendDesc BinaryEncodingIGA::encodeExDescRegA0(
13191452
(uint16_t)exDescG4->asSrcRegRegion()->ExSubRegNum(valid);
13201453
assert(valid && "invalid subreg");
13211454

1455+
if (kernel.fg.builder->useNewExtDescFormat() && descG4->isCPSEnabled()) {
1456+
// CPS is an instruction option if using RegDesc+ExBSO
1457+
extraOpts.add(InstOpt::CPS);
1458+
}
1459+
1460+
// By default all RegDesc in the new descriptor format will use
1461+
// the ExBSO model if at all possible
1462+
bool encodeExBso = kernel.fg.builder->useNewExtDescFormat();
1463+
if (encodeExBso)
1464+
extraOpts.add(InstOpt::EXBSO);
13221465

13231466
// G4 IR keeps Src1.Length (xlen) separate. So it's known,
1467+
// (even with a reg desc in nonExBSO mode)
13241468
xlen = (int)descG4->extMessageLength();
13251469

13261470
return exDescIga;
@@ -1463,6 +1607,7 @@ iga::Type BinaryEncodingIGA::getIGAType(G4_Type type, TARGET_PLATFORM genxPlatfo
14631607
case Type_V: return iga::Type::V;
14641608
case Type_VF: return iga::Type::VF;
14651609
case Type_NF: return iga::Type::NF;
1610+
case Type_BF: return iga::Type::BF;
14661611
default:
14671612
assert(false && "illegal type");
14681613
return iga::Type::INVALID;

visa/BuildCISAIR.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,49 @@ class CISA_IR_Builder : public VISABuilder
819819
VISA_BUILDER_OPTION getBuilderOption() const { return mBuildOption; }
820820
vISABuilderMode getBuilderMode() const { return m_builderMode; }
821821

822+
bool CISA_create_dpas_instruction(
823+
ISA_Opcode opcode,
824+
VISA_EMask_Ctrl emask,
825+
unsigned exec_size,
826+
VISA_opnd * dst_cisa,
827+
VISA_opnd * src0_cisa,
828+
VISA_opnd * src1_cisa,
829+
VISA_opnd * src2_cisa,
830+
GenPrecision A,
831+
GenPrecision W,
832+
uint8_t D,
833+
uint8_t C,
834+
int lineNum);
835+
836+
bool CISA_create_bfn_instruction(
837+
VISA_opnd * pred,
838+
uint8_t func_ctrl,
839+
bool sat,
840+
VISA_EMask_Ctrl emask,
841+
unsigned exec_size,
842+
VISA_opnd * dst_cisa,
843+
VISA_opnd * src0_cisa,
844+
VISA_opnd * src1_cisa,
845+
VISA_opnd * src2_cisa,
846+
int lineNum);
847+
848+
bool CISA_create_qword_scatter_instruction(
849+
ISA_Opcode opcode,
850+
VISA_opnd *pred,
851+
VISA_EMask_Ctrl eMask,
852+
unsigned execSize,
853+
unsigned numBlocks,
854+
const char* surfaceName,
855+
VISA_opnd *offsets,
856+
VISA_opnd *dstSrc,
857+
int lineNum);
858+
859+
bool CISA_create_bf_cvt_instruction(
860+
VISA_EMask_Ctrl emask,
861+
unsigned exec_size,
862+
VISA_opnd *dst,
863+
VISA_opnd *src0,
864+
int lineNum);
822865

823866

824867

0 commit comments

Comments
 (0)