@@ -983,6 +983,78 @@ void G4_Kernel::setKernelParameters() {
983
983
}
984
984
}
985
985
986
+ bool G4_Kernel::hasInlineData () const {
987
+ const IR_Builder &b = *fg.builder ;
988
+ return
989
+ b.getOption (vISA_useInlineData);
990
+ }
991
+
992
+ std::vector<ArgLayout> G4_Kernel::getArgumentLayout () {
993
+ const uint32_t startGRF =
994
+ getOptions ()->getuInt32Option (vISA_loadThreadPayloadStartReg);
995
+ const uint32_t inputsStart = startGRF * getGRFSize ();
996
+ const uint32_t inputCount = fg.builder ->getInputCount ();
997
+
998
+ const int PTIS =
999
+ AlignUp (getInt32KernelAttr (Attributes::ATTR_PerThreadInputSize),
1000
+ getGRFSize ());
1001
+
1002
+ // Checks if input_info is cross-thread-input
1003
+ auto isInCrossThreadData = [&](const input_info_t * input_info) {
1004
+ return (uint32_t )input_info->offset >= inputsStart + PTIS;
1005
+ };
1006
+
1007
+ const uint32_t inlineDataSize = fg.builder ->getInlineDataSize ();
1008
+ const bool useInlineData = hasInlineData ();
1009
+ // Checks if input_info fits in inlineData
1010
+ auto isInInlineData = [&](const input_info_t *const input_info) {
1011
+ if (!useInlineData) {
1012
+ return false ;
1013
+ }
1014
+ uint32_t inputEnd = input_info->offset + input_info->size ;
1015
+ bool fitsInInlineData = inputEnd <= inputsStart + PTIS + inlineDataSize;
1016
+ return isInCrossThreadData (input_info) && fitsInInlineData;
1017
+ };
1018
+
1019
+ const uint32_t startGrfAddr =
1020
+ getOptions ()->getuInt32Option (vISA_loadThreadPayloadStartReg) *
1021
+ getGRFSize ();
1022
+
1023
+ std::vector<ArgLayout> args;
1024
+ for (unsigned ix = 0 ; ix < inputCount; ix++) {
1025
+ const input_info_t *input = fg.builder ->getInputArg (ix);
1026
+ if (input->isPseudoInput ()) {
1027
+ continue ;
1028
+ } else if (fg.builder ->getFCPatchInfo ()->getIsEntryKernel ()) {
1029
+ const vISA::G4_Declare *dcl = input->dcl ;
1030
+ if (INPUT_GENERAL == input->getInputClass () && !dcl->isLiveIn ()) {
1031
+ break ;
1032
+ }
1033
+ }
1034
+ int dstGrfAddr = input->offset ;
1035
+ auto memSrc = ArgLayout::MemSrc::INVALID;
1036
+ int memOff = input->offset - startGrfAddr; // subtract off r0
1037
+ if (isInInlineData (input)) {
1038
+ memSrc = ArgLayout::MemSrc::INLINE;
1039
+ memOff %= getGRFSize ();
1040
+ vISA_ASSERT (memOff < (int )inlineDataSize, " inline reg arg OOB" );
1041
+ vISA_ASSERT (memOff + (int )input->size <= (int )inlineDataSize,
1042
+ " inline reg arg overflows" );
1043
+ } else if (isInCrossThreadData (input)) {
1044
+ memSrc = ArgLayout::MemSrc::CTI;
1045
+ memOff -= PTIS + (useInlineData ? inlineDataSize : 0 );
1046
+ } else {
1047
+ memSrc = ArgLayout::MemSrc::PTI;
1048
+ }
1049
+ args.emplace_back (input->dcl , dstGrfAddr, memSrc, memOff, input->size );
1050
+ }
1051
+ std::sort (args.begin (), args.end (),
1052
+ [&](const ArgLayout &a1,const ArgLayout &a2) {
1053
+ return a1.dstGrfAddr < a2.dstGrfAddr ;
1054
+ });
1055
+ return args;
1056
+ }
1057
+
986
1058
void G4_Kernel::dump (std::ostream &os) const { fg.print (os); }
987
1059
988
1060
void G4_Kernel::dumpToFile (const std::string &suffixIn, bool forceG4Dump) {
@@ -1399,16 +1471,17 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1399
1471
return ss.str ();
1400
1472
};
1401
1473
1402
- const unsigned inputCount = fg.builder ->getInputCount ();
1474
+ auto args = getArgumentLayout ();
1475
+ const unsigned inputCount = (unsigned )args.size ();
1403
1476
std::vector<std::string> argNames;
1404
1477
size_t maxNameLen = 8 ;
1405
- for (unsigned id = 0 ; id < inputCount; id ++) {
1406
- const input_info_t *ii = fg. builder -> getInputArg (id) ;
1478
+ for (unsigned ix = 0 ; ix < inputCount; ix ++) {
1479
+ const ArgLayout &a = args[ix] ;
1407
1480
std::stringstream ss;
1408
- if (ii-> dcl && ii-> dcl ->getName ()) {
1409
- ss << ii-> dcl ->getName ();
1481
+ if (a. decl && a. decl ->getName ()) {
1482
+ ss << a. decl ->getName ();
1410
1483
} else {
1411
- ss << " __unnamed" << (id + 1 );
1484
+ ss << " __unnamed" << (ix + 1 );
1412
1485
}
1413
1486
argNames.push_back (ss.str ());
1414
1487
maxNameLen = std::max (maxNameLen, argNames.back ().size ());
@@ -1419,8 +1492,8 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1419
1492
const size_t COLW_IDENT = maxNameLen;
1420
1493
static const size_t COLW_TYPE = 8 ;
1421
1494
static const size_t COLW_SIZE = 6 ;
1422
- static const size_t COLW_AT = 8 ;
1423
- static const size_t COLW_CLASS = 10 ;
1495
+ static const size_t COLW_AT = 8 ; // e.g. "r16+0x20"
1496
+ static const size_t COLW_FROM = 16 ; // e.g. "inline+0x20"
1424
1497
1425
1498
std::stringstream bordss;
1426
1499
bordss << " // " ;
@@ -1433,7 +1506,7 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1433
1506
bordss << ' +' ;
1434
1507
bordss << std::setfill (' -' ) << std::setw (COLW_AT + 2 ) << " " ;
1435
1508
bordss << ' +' ;
1436
- bordss << std::setfill (' -' ) << std::setw (COLW_CLASS + 2 ) << " " ;
1509
+ bordss << std::setfill (' -' ) << std::setw (COLW_FROM + 2 ) << " " ;
1437
1510
bordss << ' +' << " \n " ;
1438
1511
std::string border = bordss.str ();
1439
1512
@@ -1443,23 +1516,23 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1443
1516
<< " | " << std::left << std::setw (COLW_TYPE) << " type"
1444
1517
<< " | " << std::right << std::setw (COLW_SIZE) << " bytes"
1445
1518
<< " | " << std::left << std::setw (COLW_AT) << " at"
1446
- << " | " << std::left << std::setw (COLW_CLASS ) << " class "
1519
+ << " | " << std::left << std::setw (COLW_FROM ) << " from "
1447
1520
<< " |"
1448
1521
<< " \n " ;
1449
1522
os << border;
1450
1523
1451
1524
const unsigned grfSize = getGRFSize ();
1452
- for (unsigned id = 0 ; id < inputCount; id ++) {
1453
- const input_info_t *input_info = fg. builder -> getInputArg (id) ;
1525
+ for (unsigned ix = 0 ; ix < inputCount; ix ++) {
1526
+ const ArgLayout &a = args[ix] ;
1454
1527
//
1455
1528
os << " //" ;
1456
1529
//
1457
1530
// id
1458
- os << " | " << std::left << std::setw (COLW_IDENT) << argNames[id ];
1531
+ os << " | " << std::left << std::setw (COLW_IDENT) << argNames[ix ];
1459
1532
//
1460
1533
// type and length
1461
1534
// e.g. :uq x 16
1462
- const G4_Declare *dcl = input_info-> dcl ;
1535
+ const G4_Declare *dcl = a. decl ;
1463
1536
std::stringstream sstype;
1464
1537
if (dcl) {
1465
1538
switch (dcl->getElemType ()) {
@@ -1521,35 +1594,30 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1521
1594
os << " | " << std::left << std::setw (COLW_TYPE) << sstype.str ();
1522
1595
//
1523
1596
// size
1524
- os << " | " << std::right << std::setw (COLW_SIZE) << std::dec
1525
- << input_info->size ;
1597
+ os << " | " << std::right << std::setw (COLW_SIZE) << fmtHex (a.size );
1526
1598
1527
1599
// location
1528
- unsigned reg = input_info-> offset / grfSize,
1529
- subRegBytes = input_info-> offset % grfSize;
1600
+ unsigned reg = a. dstGrfAddr / grfSize,
1601
+ subRegBytes = a. dstGrfAddr % grfSize;
1530
1602
std::stringstream ssloc;
1531
1603
ssloc << " r" << reg;
1532
1604
if (subRegBytes != 0 )
1533
- ssloc << " +" << subRegBytes;
1605
+ ssloc << " +" << fmtHex ( subRegBytes) ;
1534
1606
os << " | " << std::left << std::setw (COLW_AT) << ssloc.str ();
1535
1607
1536
- // class
1537
- std::string inpcls;
1538
- switch (input_info->getInputClass ()) {
1539
- case INPUT_GENERAL:
1540
- inpcls = " general" ;
1541
- break ;
1542
- case INPUT_SAMPLER:
1543
- inpcls = " sampler" ;
1544
- break ;
1545
- case INPUT_SURFACE:
1546
- inpcls = " surface" ;
1547
- break ;
1548
- default :
1549
- inpcls = fmtHex ((int )input_info->getInputClass ());
1550
- break ;
1608
+ // from
1609
+ std::string from;
1610
+ switch (a.memSource ) {
1611
+ case ArgLayout::MemSrc::CTI: from = " cti" ; break ;
1612
+ case ArgLayout::MemSrc::PTI: from = " pti[tid]" ; break ;
1613
+ case ArgLayout::MemSrc::INLINE: from = " inline" ; break ;
1614
+ default : from = fmtHex (int (a.memSource )) + " ?" ; break ;
1551
1615
}
1552
- os << " | " << std::left << std::setw (COLW_CLASS) << inpcls;
1616
+ std::stringstream ssf;
1617
+ ssf << from;
1618
+ ssf << " +" << fmtHex (a.memOffset );
1619
+
1620
+ os << " | " << std::left << std::setw (COLW_FROM) << ssf.str ();
1553
1621
//
1554
1622
os << " |\n " ;
1555
1623
}
0 commit comments