Skip to content

Commit 3ea1fbb

Browse files
trbauerigcbot
authored andcommitted
kernel argument loading changes
clarified kernel args docs and spacing changes no functional changes in ISA output Improved kernel argument formatting in ASM
1 parent 39113a8 commit 3ea1fbb

File tree

5 files changed

+240
-76
lines changed

5 files changed

+240
-76
lines changed

visa/G4_Kernel.cpp

Lines changed: 103 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,78 @@ void G4_Kernel::setKernelParameters() {
983983
}
984984
}
985985

986+
bool G4_Kernel::hasInlineData() const {
987+
const IR_Builder &b = *fg.builder;
988+
return
989+
b.getOption(vISA_useInlineData);
990+
}
991+
992+
std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
993+
const uint32_t startGRF =
994+
getOptions()->getuInt32Option(vISA_loadThreadPayloadStartReg);
995+
const uint32_t inputsStart = startGRF * getGRFSize();
996+
const uint32_t inputCount = fg.builder->getInputCount();
997+
998+
const int PTIS =
999+
AlignUp(getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize),
1000+
getGRFSize());
1001+
1002+
// Checks if input_info is cross-thread-input
1003+
auto isInCrossThreadData = [&](const input_info_t * input_info) {
1004+
return (uint32_t)input_info->offset >= inputsStart + PTIS;
1005+
};
1006+
1007+
const uint32_t inlineDataSize = fg.builder->getInlineDataSize();
1008+
const bool useInlineData = hasInlineData();
1009+
// Checks if input_info fits in inlineData
1010+
auto isInInlineData = [&](const input_info_t *const input_info) {
1011+
if (!useInlineData) {
1012+
return false;
1013+
}
1014+
uint32_t inputEnd = input_info->offset + input_info->size;
1015+
bool fitsInInlineData = inputEnd <= inputsStart + PTIS + inlineDataSize;
1016+
return isInCrossThreadData(input_info) && fitsInInlineData;
1017+
};
1018+
1019+
const uint32_t startGrfAddr =
1020+
getOptions()->getuInt32Option(vISA_loadThreadPayloadStartReg) *
1021+
getGRFSize();
1022+
1023+
std::vector<ArgLayout> args;
1024+
for (unsigned ix = 0; ix < inputCount; ix++) {
1025+
const input_info_t *input = fg.builder->getInputArg(ix);
1026+
if (input->isPseudoInput()) {
1027+
continue;
1028+
} else if (fg.builder->getFCPatchInfo()->getIsEntryKernel()) {
1029+
const vISA::G4_Declare *dcl = input->dcl;
1030+
if (INPUT_GENERAL == input->getInputClass() && !dcl->isLiveIn()) {
1031+
break;
1032+
}
1033+
}
1034+
int dstGrfAddr = input->offset;
1035+
auto memSrc = ArgLayout::MemSrc::INVALID;
1036+
int memOff = input->offset - startGrfAddr; // subtract off r0
1037+
if (isInInlineData(input)) {
1038+
memSrc = ArgLayout::MemSrc::INLINE;
1039+
memOff %= getGRFSize();
1040+
vISA_ASSERT(memOff < (int)inlineDataSize, "inline reg arg OOB");
1041+
vISA_ASSERT(memOff + (int)input->size <= (int)inlineDataSize,
1042+
"inline reg arg overflows");
1043+
} else if (isInCrossThreadData(input)) {
1044+
memSrc = ArgLayout::MemSrc::CTI;
1045+
memOff -= PTIS + (useInlineData ? inlineDataSize : 0);
1046+
} else {
1047+
memSrc = ArgLayout::MemSrc::PTI;
1048+
}
1049+
args.emplace_back(input->dcl, dstGrfAddr, memSrc, memOff, input->size);
1050+
}
1051+
std::sort(args.begin(), args.end(),
1052+
[&](const ArgLayout &a1,const ArgLayout &a2) {
1053+
return a1.dstGrfAddr < a2.dstGrfAddr;
1054+
});
1055+
return args;
1056+
}
1057+
9861058
void G4_Kernel::dump(std::ostream &os) const { fg.print(os); }
9871059

9881060
void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
@@ -1399,16 +1471,17 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
13991471
return ss.str();
14001472
};
14011473

1402-
const unsigned inputCount = fg.builder->getInputCount();
1474+
auto args = getArgumentLayout();
1475+
const unsigned inputCount = (unsigned)args.size();
14031476
std::vector<std::string> argNames;
14041477
size_t maxNameLen = 8;
1405-
for (unsigned id = 0; id < inputCount; id++) {
1406-
const input_info_t *ii = fg.builder->getInputArg(id);
1478+
for (unsigned ix = 0; ix < inputCount; ix++) {
1479+
const ArgLayout &a = args[ix];
14071480
std::stringstream ss;
1408-
if (ii->dcl && ii->dcl->getName()) {
1409-
ss << ii->dcl->getName();
1481+
if (a.decl && a.decl->getName()) {
1482+
ss << a.decl->getName();
14101483
} else {
1411-
ss << "__unnamed" << (id + 1);
1484+
ss << "__unnamed" << (ix + 1);
14121485
}
14131486
argNames.push_back(ss.str());
14141487
maxNameLen = std::max(maxNameLen, argNames.back().size());
@@ -1419,8 +1492,8 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
14191492
const size_t COLW_IDENT = maxNameLen;
14201493
static const size_t COLW_TYPE = 8;
14211494
static const size_t COLW_SIZE = 6;
1422-
static const size_t COLW_AT = 8;
1423-
static const size_t COLW_CLASS = 10;
1495+
static const size_t COLW_AT = 8; // e.g. "r16+0x20"
1496+
static const size_t COLW_FROM = 16; // e.g. "inline+0x20"
14241497

14251498
std::stringstream bordss;
14261499
bordss << "// ";
@@ -1433,7 +1506,7 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
14331506
bordss << '+';
14341507
bordss << std::setfill('-') << std::setw(COLW_AT + 2) << "";
14351508
bordss << '+';
1436-
bordss << std::setfill('-') << std::setw(COLW_CLASS + 2) << "";
1509+
bordss << std::setfill('-') << std::setw(COLW_FROM + 2) << "";
14371510
bordss << '+' << "\n";
14381511
std::string border = bordss.str();
14391512

@@ -1443,23 +1516,23 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
14431516
<< " | " << std::left << std::setw(COLW_TYPE) << "type"
14441517
<< " | " << std::right << std::setw(COLW_SIZE) << "bytes"
14451518
<< " | " << std::left << std::setw(COLW_AT) << "at"
1446-
<< " | " << std::left << std::setw(COLW_CLASS) << "class"
1519+
<< " | " << std::left << std::setw(COLW_FROM) << "from"
14471520
<< " |"
14481521
<< "\n";
14491522
os << border;
14501523

14511524
const unsigned grfSize = getGRFSize();
1452-
for (unsigned id = 0; id < inputCount; id++) {
1453-
const input_info_t *input_info = fg.builder->getInputArg(id);
1525+
for (unsigned ix = 0; ix < inputCount; ix++) {
1526+
const ArgLayout &a = args[ix];
14541527
//
14551528
os << "//";
14561529
//
14571530
// id
1458-
os << " | " << std::left << std::setw(COLW_IDENT) << argNames[id];
1531+
os << " | " << std::left << std::setw(COLW_IDENT) << argNames[ix];
14591532
//
14601533
// type and length
14611534
// e.g. :uq x 16
1462-
const G4_Declare *dcl = input_info->dcl;
1535+
const G4_Declare *dcl = a.decl;
14631536
std::stringstream sstype;
14641537
if (dcl) {
14651538
switch (dcl->getElemType()) {
@@ -1521,35 +1594,30 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
15211594
os << " | " << std::left << std::setw(COLW_TYPE) << sstype.str();
15221595
//
15231596
// size
1524-
os << " | " << std::right << std::setw(COLW_SIZE) << std::dec
1525-
<< input_info->size;
1597+
os << " | " << std::right << std::setw(COLW_SIZE) << fmtHex(a.size);
15261598

15271599
// location
1528-
unsigned reg = input_info->offset / grfSize,
1529-
subRegBytes = input_info->offset % grfSize;
1600+
unsigned reg = a.dstGrfAddr / grfSize,
1601+
subRegBytes = a.dstGrfAddr % grfSize;
15301602
std::stringstream ssloc;
15311603
ssloc << "r" << reg;
15321604
if (subRegBytes != 0)
1533-
ssloc << "+" << subRegBytes;
1605+
ssloc << "+" << fmtHex(subRegBytes);
15341606
os << " | " << std::left << std::setw(COLW_AT) << ssloc.str();
15351607

1536-
// class
1537-
std::string inpcls;
1538-
switch (input_info->getInputClass()) {
1539-
case INPUT_GENERAL:
1540-
inpcls = "general";
1541-
break;
1542-
case INPUT_SAMPLER:
1543-
inpcls = "sampler";
1544-
break;
1545-
case INPUT_SURFACE:
1546-
inpcls = "surface";
1547-
break;
1548-
default:
1549-
inpcls = fmtHex((int)input_info->getInputClass());
1550-
break;
1608+
// from
1609+
std::string from;
1610+
switch (a.memSource) {
1611+
case ArgLayout::MemSrc::CTI: from = "cti"; break;
1612+
case ArgLayout::MemSrc::PTI: from = "pti[tid]"; break;
1613+
case ArgLayout::MemSrc::INLINE: from = "inline"; break;
1614+
default: from = fmtHex(int(a.memSource)) + "?"; break;
15511615
}
1552-
os << " | " << std::left << std::setw(COLW_CLASS) << inpcls;
1616+
std::stringstream ssf;
1617+
ssf << from;
1618+
ssf << "+" << fmtHex(a.memOffset);
1619+
1620+
os << " | " << std::left << std::setw(COLW_FROM) << ssf.str();
15531621
//
15541622
os << " |\n";
15551623
}

visa/G4_Kernel.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,37 @@ class StackCallABI {
396396
uint32_t getThreadHeaderGRF() const;
397397
};
398398

399+
// represents an argument placement
400+
struct ArgLayout {
401+
const G4_Declare *decl;
402+
403+
// the byte offset in GRF this argument is loaded to
404+
int dstGrfAddr;
405+
406+
// kernel argument buffer source region
407+
enum class MemSrc {
408+
INVALID,
409+
// cross thread input
410+
CTI,
411+
// per thread input
412+
PTI,
413+
// inline data register
414+
INLINE
415+
};
416+
MemSrc memSource;
417+
418+
// the offset within the memory region this is loaded from
419+
int memOffset;
420+
421+
// the size (in memory and GRF) of the argument in bytes
422+
int size;
423+
424+
ArgLayout(const G4_Declare *dcl, int dstGrfAdr, MemSrc mSrc, int mOff,
425+
int sz)
426+
: decl(dcl), dstGrfAddr(dstGrfAdr), memSource(mSrc), memOffset(mOff),
427+
size(sz) { }
428+
};
429+
399430
class G4_Kernel {
400431
public:
401432
using RelocationTableTy = std::vector<RelocationEntry>;
@@ -557,6 +588,8 @@ class G4_Kernel {
557588
uint32_t getFunctionId() const { return m_function_id; }
558589

559590
Options *getOptions() { return m_options; }
591+
const Options *getOptions() const { return m_options; }
592+
560593
const Attributes *getKernelAttrs() const { return m_kernelAttrs; }
561594
bool getBoolKernelAttr(Attributes::ID aID) const {
562595
return getKernelAttrs()->getBoolKernelAttr(aID);
@@ -722,6 +755,9 @@ class G4_Kernel {
722755
inst->computeRightBound(accDef);
723756
}
724757

758+
bool hasInlineData() const;
759+
std::vector<ArgLayout> getArgumentLayout();
760+
725761
private:
726762
G4_BB *getNextBB(G4_BB *bb) const;
727763
unsigned getBinOffsetOfBB(G4_BB *bb) const;

0 commit comments

Comments
 (0)