Skip to content

Commit e51af7b

Browse files
trbauerigcbot
authored andcommitted
coordinate immediate offset
support TGM coordinate immediate offsets vISA emulates small addition to coordinates in payload creation
1 parent 9411387 commit e51af7b

File tree

6 files changed

+113
-80
lines changed

6 files changed

+113
-80
lines changed

visa/BuildIR.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,8 +2044,10 @@ class IR_Builder {
20442044
LSC_ADDR_SIZE addrSize, LSC_DATA_SHAPE shape,
20452045
G4_Operand *surface, // surface/bti
20462046
G4_DstRegRegion *dstData, // dst on load/atomic
2047-
G4_SrcRegRegion *src0AddrUs, G4_SrcRegRegion *src0AddrVs,
2048-
G4_SrcRegRegion *src0AddrRs, G4_SrcRegRegion *src0AddrLODs,
2047+
G4_SrcRegRegion *src0AddrUs, int uOff,
2048+
G4_SrcRegRegion *src0AddrVs, int vOff,
2049+
G4_SrcRegRegion *src0AddrRs, int rOff,
2050+
G4_SrcRegRegion *src0AddrLODs,
20492051
G4_SrcRegRegion *src1Data, // store data/extra atomic operands
20502052
G4_SrcRegRegion *src2Data // icas/fcas only
20512053
);

visa/IsaDescription.cpp

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3215,43 +3215,39 @@ static const ISA_SubInst_Desc SVMSubOpcodeDesc[] = {
32153215
}
32163216

32173217
// All LSC typed ops encode the same
3218-
#define LSC_TYPED_OP(ISA_OP, MNEMONIC) \
3219-
{ \
3220-
(ISA_OP), ISA_Inst_LSC, (MNEMONIC), 22, { \
3221-
/* execution control */ \
3222-
{OPND_EXECSIZE, ISA_TYPE_UB, 0}, /* execution size */ \
3223-
{OPND_PRED, ISA_TYPE_UW, 0}, /* predicate */ /* caching opts */ \
3224-
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_CACHE_OPTS::l1 */ \
3225-
{OPND_OTHER, ISA_TYPE_UB, \
3226-
0}, /* LSC_CACHE_OPTS::l3 */ /* addr stuff */ \
3227-
{OPND_OTHER, ISA_TYPE_UB, \
3228-
0}, /* LSC_ADDR::type */ /* confirmed with arch that immoff doesn't \
3229-
exist for typed */ \
3230-
{OPND_OTHER, ISA_TYPE_UB, \
3231-
0}, /* LSC_ADDR::size */ /* data shape stuff */ /* we keep \
3232-
LSC_DATA_SHAPE::order \
3233-
due to \
3234-
lsc_load_status.tgm \
3235-
*/ \
3236-
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE:size */ \
3237-
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE::order */ \
3238-
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE::elems */ \
3239-
{OPND_OTHER, ISA_TYPE_UB, \
3240-
0}, /* LSC_DATA_SHAPE::cmask */ /* operands */ \
3241-
{OPND_SRC_GEN | OPND_IMM | OPND_SRC_ADDR, ISA_TYPE_UB, \
3242-
0}, /* surface reg or imm */ \
3243-
{OPND_OTHER, ISA_TYPE_UD, 0}, /* Reserved */ \
3244-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* dst */ \
3245-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Us */ \
3246-
{OPND_OTHER, ISA_TYPE_D, 0}, /* Reserved */ \
3247-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Vs */ \
3248-
{OPND_OTHER, ISA_TYPE_D, 0}, /* Reserved */ \
3249-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Rs */ \
3250-
{OPND_OTHER, ISA_TYPE_D, 0}, /* Reserved */ \
3251-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr LODs */ \
3252-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src1 data */ \
3253-
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src2 data */ \
3254-
} \
3218+
#define LSC_TYPED_OP(ISA_OP, MNEMONIC) \
3219+
{ \
3220+
(ISA_OP), ISA_Inst_LSC, (MNEMONIC), 22, { \
3221+
/* execution control */ \
3222+
{OPND_EXECSIZE, ISA_TYPE_UB, 0}, /* execution size */ \
3223+
{OPND_PRED, ISA_TYPE_UW, 0}, /* predicate */ \
3224+
/* caching opts */ \
3225+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_CACHE_OPTS::l1 */ \
3226+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_CACHE_OPTS::l3 */ \
3227+
/* addr stuff */ \
3228+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_ADDR::type */ \
3229+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_ADDR::size */ \
3230+
/* data shape stuff */ \
3231+
/* we keep LSC_DATA_SHAPE::order due to lsc_load_status.tgm */ \
3232+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE:size */ \
3233+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE::order */ \
3234+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE::elems */ \
3235+
{OPND_OTHER, ISA_TYPE_UB, 0}, /* LSC_DATA_SHAPE::cmask */ \
3236+
/* operands */ \
3237+
{OPND_SRC_GEN | OPND_IMM | OPND_SRC_ADDR, ISA_TYPE_UB, \
3238+
0}, /* surface (reg or imm) */ \
3239+
{OPND_OTHER, ISA_TYPE_UD, 0}, /* surface imm offset */ \
3240+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* dst */ \
3241+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Us */ \
3242+
{OPND_OTHER, ISA_TYPE_D, 0}, /* src0 addr Us immoff */ \
3243+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Vs */ \
3244+
{OPND_OTHER, ISA_TYPE_D, 0}, /* src0 addr Vs immoff */ \
3245+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr Rs */ \
3246+
{OPND_OTHER, ISA_TYPE_D, 0}, /* src0 addr Rs immoff */ \
3247+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src0 addr LODs */ \
3248+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src1 data */ \
3249+
{OPND_RAW, ISA_TYPE_UB, GRF_ALIGNED}, /* src2 data */ \
3250+
} \
32553251
}
32563252
#define LSC_OP_INVALID \
32573253
{}

visa/IsaDisassembly.cpp

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3060,44 +3060,50 @@ class LscInstFormatter {
30603060
auto addrType = getNextEnumU8<LSC_ADDR_TYPE>();
30613061
auto addrSize = getNextEnumU8<LSC_ADDR_SIZE>();
30623062
auto dataShape = getNextDataShape();
3063+
// parameter order (cf IsaDescription.cpp)
3064+
// ... currOpIx here:
3065+
// 0 - surface
3066+
// 1 - surfaceIndex
3067+
// 2 - dst (data read)
3068+
// 3 - src0 U's (addr)
3069+
// 4 - u offset
3070+
// 5 - src0 V's (addr)
3071+
// 6 - v offset
3072+
// 7 - src0 R's (addr)
3073+
// 8 - r offset
3074+
// 9 - src0 LOD's (addr)
3075+
// 10 - src1 (data sent)
3076+
// 11 - src2 (extra data sent for atomic)
30633077

30643078
auto fmtAddrOperand = [&]() {
3065-
// 0 dst, 1-4 u/v/r/lod, 5 src1, 6 src2
30663079
formatAddrType(addrType, currOpIx);
30673080
ss << "[";
3068-
{
3069-
for (int i = 0; i < 4; i++) {
3070-
// +2 skip surface and dst
3071-
const raw_opnd &ro = getRawOperand(inst, currOpIx + 2 + i);
3072-
auto reg =
3073-
printVariableDeclName(header, ro.index, opts, NOT_A_STATE_OPND);
3074-
// TODO: for null operands, printVariableDeclName with
3075-
// NOT_A_STATE_OPND will return %null and not V0
3076-
// Should this be changed to return V0 for null operands?
3077-
if (reg == "%null")
3078-
break;
3079-
if (i > 0)
3080-
ss << ",";
3081-
formatRawOperand(currOpIx + 2 + i);
3081+
for (int i = 0; i < 4; i++) {
3082+
unsigned int ix = currOpIx + 3 + (i * 2);
3083+
const raw_opnd &ro = getRawOperand(inst, ix);
3084+
auto reg =
3085+
printVariableDeclName(header, ro.index, opts, NOT_A_STATE_OPND);
3086+
if (reg == "%null")
3087+
break; // assume no coords after %null (no imm offs either)
3088+
if (i > 0)
3089+
ss << ",";
3090+
formatRawOperand(ix);
3091+
if (i < 3) {
3092+
int32_t offset = getPrimitive<int32_t>(ix + 1);
3093+
if (offset > 0)
3094+
ss << "+" << offset;
3095+
else if (offset < 0)
3096+
ss << "-" << -offset;
30823097
}
30833098
}
30843099
ss << "]";
30853100
formatAddrSize(addrSize);
30863101
};
30873102

30883103
ss << " ";
3089-
int dstIx = currOpIx + 1;
3090-
int src1Ix = currOpIx + 6;
3091-
int src2Ix = currOpIx + 7;
3092-
// parameter order (cf IsaDescription.cpp)
3093-
// 0 - surface
3094-
// 1 - dst (data read)
3095-
// 2 - src0 U's (addr)
3096-
// 3 - src0 V's (addr)
3097-
// 4 - src0 R's (addr)
3098-
// 5 - src0 LOD's (addr)
3099-
// 6 - src1 (data sent)
3100-
// 7 - src2 (extra data sent for atomic)
3104+
const int dstIx = currOpIx + 2;
3105+
const int src1Ix = currOpIx + 10;
3106+
const int src2Ix = currOpIx + 11;
31013107
if (opInfo.isLoad()) {
31023108
formatDataOperand(dataShape, dstIx);
31033109
ss << " ";
@@ -3137,8 +3143,9 @@ class LscInstFormatter {
31373143
(void)getNextDataShape();
31383144

31393145
int addrSurfIx = currOpIx + 0;
3140-
int dstIx = currOpIx + 1;
3141-
int src0Ix = currOpIx + 2;
3146+
// see formatTyped() for the parameter order
3147+
int dstIx = currOpIx + 2;
3148+
int src0Ix = currOpIx + 3;
31423149

31433150
ss << " ";
31443151
formatRawOperand(dstIx); // dst

visa/IsaVerification.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4140,13 +4140,14 @@ struct LscInstVerifier {
41404140
verifyDataShape(dataShape);
41414141

41424142
verifyAddressType(addrType, currOpIx);
4143-
unsigned int uIx = currOpIx + 2;
4144-
unsigned int vIx = currOpIx + 3;
4145-
unsigned int rIx = currOpIx + 4;
4146-
unsigned int lodIx = currOpIx + 5;
4147-
unsigned int dstOpIx = currOpIx + 1;
4148-
unsigned int src1OpIx = currOpIx + 6;
4149-
// check all the Src0Addr fields (U, V, R, LOD)
4143+
const unsigned uIx = currOpIx + 3;
4144+
const unsigned vIx = currOpIx + 5;
4145+
const unsigned rIx = currOpIx + 7;
4146+
const unsigned lodIx = currOpIx + 9;
4147+
const unsigned dstOpIx = currOpIx + 2;
4148+
const unsigned src1OpIx = currOpIx + 10;
4149+
4150+
// check all the Src0Addr fields (U, V, R, LOD)
41504151
if (opInfo.op == LSC_READ_STATE_INFO) {
41514152
verifyRawOperandNonNull("Src0Addr_UVRL", uIx); // SIMD1 (U, V, R, LOD)
41524153
verifyRawOperandNull("Src0Addr_Vs", vIx); // V's

visa/VISAKernelImpl.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7980,9 +7980,10 @@ VISA_BUILDER_API int VISAKernelImpl::AppendVISALscTypedInst(
79807980
status = m_builder->translateLscTypedInst(
79817981
subOpcode, pred ? pred->g4opnd->asPredicate() : nullptr, execSize,
79827982
emask, cacheOpts, addrType, addrSize, dataShape, surface->g4opnd,
7983-
dstData->g4opnd->asDstRegRegion(), coord0s->g4opnd->asSrcRegRegion(),
7984-
coord1s->g4opnd->asSrcRegRegion(),
7985-
coord2s->g4opnd->asSrcRegRegion(),
7983+
dstData->g4opnd->asDstRegRegion(),
7984+
coord0s->g4opnd->asSrcRegRegion(), coord0Offset,
7985+
coord1s->g4opnd->asSrcRegRegion(), coord1Offset,
7986+
coord2s->g4opnd->asSrcRegRegion(), coord2Offset,
79867987
features->g4opnd->asSrcRegRegion(),
79877988
src1Data->g4opnd->asSrcRegRegion(), src2Data->g4opnd->asSrcRegRegion());
79887989
}
@@ -8006,10 +8007,14 @@ VISA_BUILDER_API int VISAKernelImpl::AppendVISALscTypedInst(
80068007
ADD_OPND(numOpnds, opnds, CreateOtherOpnd(dataShape.chmask, ISA_TYPE_UB));
80078008
//
80088009
ADD_OPND(numOpnds, opnds, surface);
8010+
ADD_OPND(numOpnds, opnds, CreateOtherOpnd(surfaceIndex, ISA_TYPE_D));
80098011
ADD_OPND(numOpnds, opnds, dstData);
80108012
ADD_OPND(numOpnds, opnds, coord0s);
8013+
ADD_OPND(numOpnds, opnds, CreateOtherOpnd(coord0Offset, ISA_TYPE_D));
80118014
ADD_OPND(numOpnds, opnds, coord1s);
8015+
ADD_OPND(numOpnds, opnds, CreateOtherOpnd(coord1Offset, ISA_TYPE_D));
80128016
ADD_OPND(numOpnds, opnds, coord2s);
8017+
ADD_OPND(numOpnds, opnds, CreateOtherOpnd(coord2Offset, ISA_TYPE_D));
80138018
ADD_OPND(numOpnds, opnds, features);
80148019
ADD_OPND(numOpnds, opnds, src1Data);
80158020
ADD_OPND(numOpnds, opnds, src2Data);

visa/VisaToG4/TranslateSendLdStLsc.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,10 @@ int IR_Builder::translateLscTypedInst(
795795
LSC_ADDR_SIZE addrSize, LSC_DATA_SHAPE shape,
796796
G4_Operand *surface, // surface/bti
797797
G4_DstRegRegion *dstData, // dst on load/atomic
798-
G4_SrcRegRegion *coord0s, G4_SrcRegRegion *coord1s,
799-
G4_SrcRegRegion *coord2s, G4_SrcRegRegion *features,
798+
G4_SrcRegRegion *coord0s, int uOff,
799+
G4_SrcRegRegion *coord1s, int vOff,
800+
G4_SrcRegRegion *coord2s, int rOff,
801+
G4_SrcRegRegion *features,
800802
G4_SrcRegRegion *src1Data, // store data/extra atomic operands
801803
G4_SrcRegRegion *src2Data // icas/fcas only
802804
) {
@@ -860,6 +862,26 @@ int IR_Builder::translateLscTypedInst(
860862
checkAddrPayloadSize("src0AddrRs", coord2s);
861863
checkAddrPayloadSize("src0Feature", features);
862864

865+
// emulate coordinate immediate offsets that are unsupported
866+
auto addPayloadOffset = [&](G4_SrcRegRegion *srcCoord, int& coordImmOff) {
867+
if (coordImmOff == 0)
868+
return srcCoord;
869+
auto elemsPerCoord =
870+
std::max<unsigned>(numEltPerGRF<Type_D>(), (unsigned)execSize);
871+
G4_Declare *srcCoordCopy = createSendPayloadDcl(elemsPerCoord, Type_D);
872+
srcCoordCopy->setEvenAlign();
873+
G4_DstRegRegion *dst = createDstRegRegion(srcCoordCopy, 1);
874+
G4_Imm *imm = createImm(coordImmOff, Type_D);
875+
G4_Predicate *pr = duplicateOperand(pred);
876+
createBinOp(pr, G4_add, execSize, dst, srcCoord, imm, instOpt, true);
877+
coordImmOff = 0;
878+
return createSrcRegRegion(srcCoordCopy, getRegionStride1());
879+
}; // addPayloadOffset
880+
881+
coord0s = addPayloadOffset(coord0s, uOff);
882+
coord1s = addPayloadOffset(coord1s, vOff);
883+
coord2s = addPayloadOffset(coord2s, rOff);
884+
863885
PayloadSource srcAddrPayloads[4]{}; // U, V, R, feature
864886
unsigned numSrcAddrPayloads = 0;
865887
buildTypedSurfaceAddressPayload(coord0s, coord1s, coord2s,

0 commit comments

Comments
 (0)