Skip to content

Commit 72c631e

Browse files
fangliu2020igcbot
authored andcommitted
[vISA] Fix bugs in G4_SendDescRaw::getElemsPerAddr() and G4_SendDescRaw::getElemSize()
Fix bugs in G4_SendDescRaw::getElemsPerAddr() and G4_SendDescRaw::getElemSize()
1 parent fec8eb8 commit 72c631e

File tree

5 files changed

+102
-98
lines changed

5 files changed

+102
-98
lines changed

visa/BinaryEncodingIGA.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,9 +1567,9 @@ static SendDesc encodeExDescSendUnary(G4_InstSend *sendInst, int &xlen,
15671567
// vISA_ShaderDataBaseStats key a requirement to use
15681568
void BinaryEncodingIGA::printSendDataToFile(const G4_SendDescRaw *descG4,
15691569
const char *filePath) const {
1570-
uint32_t src0Len = descG4->getSrc0LenBytes() / 32;
1571-
uint32_t src1Len = descG4->getSrc1LenBytes() / 32;
1572-
uint32_t dstLen = descG4->getDstLenBytes() / 32;
1570+
uint32_t src0Len = descG4->getSrc0LenRegs();
1571+
uint32_t src1Len = descG4->getSrc1LenRegs();
1572+
uint32_t dstLen = descG4->getDstLenRegs();
15731573
FILE *f = fopen(filePath, "a");
15741574
if (f) {
15751575
uint32_t namePos = fileName.find_last_of('\\', fileName.size());

visa/G4_IR.cpp

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6289,44 +6289,29 @@ void G4_InstSend::computeRightBound(G4_Operand *opnd) {
62896289
associateOpndWithInst(opnd, this);
62906290

62916291
if (opnd && !opnd->isImm() && !opnd->isNullReg()) {
6292-
auto computeSendOperandBound = [this](G4_Operand *opnd, int numReg) {
6293-
if (numReg == 0) {
6294-
return;
6295-
}
6296-
6297-
// Sends read/write in units of GRF. With a narrower simd width,
6298-
// the variable may have size smaller than one GRF, or smaller
6299-
// the reponse or message length. In this case, limit the right
6300-
// bound up to the variable size.
6301-
unsigned LB = opnd->left_bound;
6302-
unsigned RB =
6303-
std::min(opnd->getTopDcl()->getByteSize(),
6304-
LB + numReg * getBuilder().numEltPerGRF<Type_UB>()) -
6305-
1;
6306-
6307-
unsigned NBytes = RB - LB + 1;
6308-
opnd->setBitVecFromSize(NBytes, getBuilder());
6309-
opnd->setRightBound(RB);
6310-
};
6311-
6312-
if (srcs[0] == opnd || (isSplitSend() && srcs[1] == opnd)) {
6313-
// For send instruction's msg operand rightbound depends
6314-
// on msg descriptor
6315-
uint16_t numReg = (srcs[0] == opnd) ? getMsgDesc()->getSrc0LenRegs()
6316-
: getMsgDesc()->getSrc1LenRegs();
6317-
computeSendOperandBound(opnd, numReg);
6318-
} else if (dst == opnd) {
6319-
// Compute right bound for dst operand
6292+
if (dst == opnd || srcs[0] == opnd || (isSplitSend() && srcs[1] == opnd)) {
6293+
// Compute right bound for dst/src0/src1 operands
63206294
const auto *desc = getMsgDesc();
6321-
uint32_t dstBytes = desc->getDstLenBytes();
6322-
if (dstBytes < getBuilder().getGRFSize()) {
6323-
// e.g. OWord block read x1
6324-
opnd->setBitVecL((1ULL << dstBytes) - 1);
6325-
opnd->setRightBound(opnd->left_bound + dstBytes - 1);
6295+
uint32_t opndBytes = 0;
6296+
if (dst == opnd)
6297+
opndBytes = desc->getDstLenBytes();
6298+
else if (srcs[0] == opnd)
6299+
opndBytes = desc->getSrc0LenBytes();
6300+
else
6301+
opndBytes = desc->getSrc1LenBytes();
63266302

6303+
if (opndBytes < getBuilder().getGRFSize()) {
6304+
// e.g. OWord block read x1
6305+
opnd->setBitVecL((1ULL << opndBytes) - 1);
6306+
opnd->setRightBound(opnd->left_bound + opndBytes - 1);
63276307
} else {
6328-
uint16_t numReg = desc->getDstLenRegs();
6329-
computeSendOperandBound(opnd, numReg);
6308+
// For some sends, the operands's size is in GRF unit but not the exact
6309+
// size, e.g. block2d. The variable size may be a smaller value. In
6310+
// this case, limit the right bound up to the variable size.
6311+
auto dclSize = opnd->getTopDcl()->getByteSize();
6312+
opndBytes = std::min(dclSize, opndBytes);
6313+
opnd->setBitVecFromSize(opndBytes, getBuilder());
6314+
opnd->setRightBound(opnd->left_bound + opndBytes - 1);
63306315
}
63316316
} else {
63326317
opnd->computeRightBound(execSize);

visa/G4_SendDescs.cpp

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,8 @@ unsigned G4_SendDescRaw::getElemsPerAddr() const {
10801080
case LSC_STORE_QUAD:
10811081
case LSC_LOAD_QUAD: {
10821082
int elems = 0;
1083-
auto cmask = (getDesc() >> 14) & 0xF;
1083+
// bits [15:12] are the channel mask
1084+
auto cmask = (getDesc() >> 12) & 0xF;
10841085
for (int i = 0; i < 4; i++, cmask >>= 1) {
10851086
elems += (cmask & 1);
10861087
}
@@ -1091,7 +1092,7 @@ unsigned G4_SendDescRaw::getElemsPerAddr() const {
10911092
case LSC_STORE:
10921093
case LSC_STORE_STRIDED:
10931094
// bits [14:12] are the vector size
1094-
switch ((getDesc() >> 14) & 0x7) {
1095+
switch ((getDesc() >> 12) & 0x7) {
10951096
case 0:
10961097
return 1;
10971098
case 1:
@@ -1167,7 +1168,7 @@ unsigned G4_SendDescRaw::getElemSize() const {
11671168
return 0;
11681169
} // else supported
11691170
}
1170-
// []
1171+
// bits [11:9] are data size
11711172
switch ((getDesc() >> 9) & 0x7) {
11721173
case 0:
11731174
return 1; // d8 (block2d only)
@@ -1176,7 +1177,7 @@ unsigned G4_SendDescRaw::getElemSize() const {
11761177
case 3:
11771178
return 8; // d64
11781179
default:
1179-
return 2; // d32, d8u32, ... all 32b in register file
1180+
return 4; // d32, d8u32, ... all 32b in register file
11801181
}
11811182
} else if (getSFID() == SFID::SAMPLER) {
11821183
return is16BitReturn() ? 2 : 4;
@@ -1500,43 +1501,64 @@ size_t G4_SendDescRaw::getSrc0LenBytes() const {
15001501
return MessageLength() * (size_t)irb.getGRFSize();
15011502
}
15021503

1504+
uint32_t G4_SendDescRaw::getDataSizeInBytesLscLdStInst(
1505+
Gen4_Operand_Number opnd_num) const {
1506+
vISA_ASSERT(opnd_num == Opnd_dst || opnd_num == Opnd_src1,
1507+
"expect Opnd_dst or Opnd_src1");
1508+
uint32_t dataBytes = opnd_num == Opnd_dst
1509+
? (ResponseLength() * irb.getGRFSize())
1510+
: (src1Len * irb.getGRFSize());
1511+
if (getLscDataOrder() == LSC_DATA_ORDER_NONTRANSPOSE) {
1512+
// Non-transpose
1513+
// If vecSize > 1, make the data size GRF-aligned for simplicity.
1514+
// Otherwise, the data size is the exact bytes accessed by HW.
1515+
if (getElemsPerAddr() <= 1)
1516+
dataBytes = execSize * getElemSize();
1517+
} else {
1518+
// Transpose
1519+
dataBytes = getElemsPerAddr() * getElemSize();
1520+
}
1521+
return dataBytes;
1522+
}
1523+
15031524
size_t G4_SendDescRaw::getDstLenBytes() const {
1525+
uint32_t dstBytes = ResponseLength() * irb.getGRFSize();
15041526
if (isHWordScratchRW() && ResponseLength() != 0) {
1505-
return 32 * getHWScratchRWSize(); // HWords
1527+
dstBytes = 32 * getHWScratchRWSize(); // HWords
15061528
} else if (isOwordLoad()) {
1507-
return 16 * getOwordsAccessed(); // OWords
1508-
#if 0
1509-
// Due to VMIT-9224, comment this out!
1510-
} else if (isByteScatterRW() && isDataPortRead()) {
1511-
vASSERT(getExecSize() != g4::SIMD_UNDEFINED);
1512-
uint16_t nbytes = getElemsPerAddr();
1513-
// assume 4 at least
1514-
nbytes = (nbytes >= 4 ? nbytes : 4);
1515-
size_t sz = nbytes * getExecSize();
1516-
return sz;
1517-
} else if (isDWScatterRW() && isDataPortRead()) {
1518-
vASSERT(getExecSize() != g4::SIMD_UNDEFINED);
1519-
size_t sz = 4 * getElemsPerAddr() * getExecSize();
1520-
return sz;
1521-
} else if (isQWScatterRW() && isDataPortRead()) {
1522-
vASSERT(getExecSize() != g4::SIMD_UNDEFINED);
1523-
size_t sz = 8 * getElemsPerAddr() * getExecSize();
1524-
return sz;
1525-
} else if (isUntypedRW() && isDataPortRead()) {
1526-
vASSERT(getExecSize() != g4::SIMD_UNDEFINED);
1527-
size_t sz = 4 * getEnabledChannelNum() * getExecSize();
1528-
return sz;
1529-
#endif
1530-
} else {
1531-
// fallback to the raw GRF count
1532-
return ResponseLength() * (size_t)irb.getGRFSize();
1529+
dstBytes = 16 * getOwordsAccessed(); // OWords
1530+
} else if (isLscDescriptor) {
1531+
// LSC messages
1532+
auto op = getLscOp();
1533+
switch (op) {
1534+
case LSC_OP::LSC_LOAD:
1535+
if (ResponseLength() != 0)
1536+
dstBytes = getDataSizeInBytesLscLdStInst(Opnd_dst);
1537+
break;
1538+
// TODO: handle other LSC op codes
1539+
default:
1540+
break;
1541+
}
15331542
}
1543+
return dstBytes;
15341544
}
15351545

15361546
size_t G4_SendDescRaw::getSrc1LenBytes() const {
15371547
if (isLscDescriptor) {
1538-
return src1Len * irb.getGRFSize();
1548+
uint32_t src1LenBytes = src1Len * irb.getGRFSize();
1549+
auto op = getLscOp();
1550+
switch (op) {
1551+
case LSC_OP::LSC_STORE:
1552+
src1LenBytes = getDataSizeInBytesLscLdStInst(Opnd_src1);
1553+
break;
1554+
// TODO: handle other LSC op codes
1555+
default:
1556+
// use the default value
1557+
break;
1558+
}
1559+
return src1LenBytes;
15391560
}
1561+
15401562
if (isHWordScratchRW() && extMessageLength() != 0) {
15411563
return 32 * getHWScratchRWSize(); // HWords
15421564
}

visa/G4_SendDescs.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,10 @@ class G4_SendDescRaw : public G4_SendDesc {
624624

625625
std::string getDescription() const override;
626626

627+
// Return data size of either dst or src1 in bytes for LSC
628+
// load/store instructions
629+
uint32_t getDataSizeInBytesLscLdStInst(Gen4_Operand_Number opnd_num) const;
630+
627631
private:
628632
void setBindingTableIdx(unsigned idx);
629633

visa/G4_Verifier.cpp

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -403,27 +403,23 @@ void G4Verifier::verifyOpnd(G4_Operand *opnd, G4_INST *inst) {
403403

404404
if (opnd == inst->getDst()) {
405405
if (opnd->isRightBoundSet() && !opnd->isNullReg()) {
406-
unsigned int correctRB = ((inst->getMsgDesc()->getDstLenRegs() +
407-
opnd->asDstRegRegion()->getRegOff()) *
408-
kernel.numEltPerGRF<Type_UB>()) -
409-
1;
406+
unsigned int correctRB = opnd->asDstRegRegion()->getRegOff() *
407+
kernel.numEltPerGRF<Type_UB>();
410408
uint32_t dstLenBytes = inst->getMsgDesc()->getDstLenBytes();
411409
if (dstLenBytes < kernel.getGRFSize()) {
412-
correctRB = opnd->getLeftBound() + dstLenBytes - 1;
413-
} else if (opnd->getTopDcl()->getByteSize() <
414-
kernel.numEltPerGRF<Type_UB>()) {
415-
correctRB =
416-
opnd->getLeftBound() + opnd->getTopDcl()->getByteSize() - 1;
410+
correctRB += (dstLenBytes - 1);
411+
} else {
412+
uint32_t correctDstLenBytes =
413+
std::min(opnd->getTopDcl()->getByteSize(), dstLenBytes);
414+
correctRB += (correctDstLenBytes - 1);
417415
}
418416

419417
G4_Declare *parentDcl = opnd->getBase()->asRegVar()->getDeclare();
420-
while (parentDcl != NULL) {
418+
while (parentDcl) {
421419
correctRB += parentDcl->getAliasOffset();
422420
parentDcl = parentDcl->getAliasDeclare();
423421
}
424422

425-
correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
426-
427423
if (opnd->getRightBound() != correctRB) {
428424
DEBUG_VERBOSE("Right bound mismatch for send inst dst. Orig rb = "
429425
<< opnd->getRightBound()
@@ -434,31 +430,28 @@ void G4Verifier::verifyOpnd(G4_Operand *opnd, G4_INST *inst) {
434430
vISA_ASSERT(false, "Right bound mismatch!");
435431
}
436432
}
437-
} else if (opnd == inst->getSrc(0) || opnd == inst->getSrc(1)) {
433+
} else if (opnd == inst->getSrc(0) ||
434+
(inst->isSplitSend() && opnd == inst->getSrc(1))) {
438435
if (opnd->isRightBoundSet()) {
439-
int msgLength = (opnd == inst->getSrc(0))
440-
? inst->getMsgDesc()->getSrc0LenRegs()
441-
: inst->getMsgDesc()->getSrc1LenRegs();
442-
unsigned int numBytes = opnd->getTopDcl()->getByteSize();
443-
unsigned int correctRB = 0;
444-
if (numBytes < kernel.numEltPerGRF<Type_UB>()) {
445-
correctRB = opnd->asSrcRegRegion()->getRegOff() *
446-
kernel.numEltPerGRF<Type_UB>() +
447-
numBytes - 1;
436+
unsigned int correctRB = opnd->asSrcRegRegion()->getRegOff() *
437+
kernel.numEltPerGRF<Type_UB>();
438+
unsigned int srcBytes = (opnd == inst->getSrc(0))
439+
? inst->getMsgDesc()->getSrc0LenBytes()
440+
: inst->getMsgDesc()->getSrc1LenBytes();
441+
if (srcBytes < kernel.numEltPerGRF<Type_UB>()) {
442+
correctRB += (srcBytes - 1);
448443
} else {
449-
correctRB = ((msgLength + opnd->asSrcRegRegion()->getRegOff()) *
450-
kernel.numEltPerGRF<Type_UB>()) -
451-
1;
444+
uint32_t correctSrcBytes =
445+
std::min(opnd->getTopDcl()->getByteSize(), srcBytes);
446+
correctRB += (correctSrcBytes - 1);
452447
}
453448

454449
G4_Declare *parentDcl = opnd->getBase()->asRegVar()->getDeclare();
455-
while (parentDcl != NULL) {
450+
while (parentDcl) {
456451
correctRB += parentDcl->getAliasOffset();
457452
parentDcl = parentDcl->getAliasDeclare();
458453
}
459454

460-
correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
461-
462455
if (opnd->getRightBound() != correctRB) {
463456
DEBUG_VERBOSE("Right bound mismatch for send inst src0. Orig rb = "
464457
<< opnd->getRightBound()

0 commit comments

Comments
 (0)