Skip to content

Commit af144e6

Browse files
jgu222igcbot
authored andcommitted
For the following code:
mov (8|M0) r80.0<1>:d 0:w send (1|M0) r80:d r4:uq 0xC 0x041401FF add (1|M0) r80.1<1>:d r54.0<0;1,0>:d r80.1<0;1,0>:d add (1|M0) r80.2<1>:d r58.0<0;1,0>:d r80.2<0;1,0>:d add (1|M0) r80.3<1>:d r62.0<0;1,0>:d r80.3<0;1,0>:d add (1|M0) r80.4<1>:d r66.0<0;1,0>:d r80.4<0;1,0>:d add (1|M0) r80.5<1>:d r70.0<0;1,0>:d r80.5<0;1,0>:d add (1|M0) r80.6<1>:d r74.0<0;1,0>:d r80.6<0;1,0>:d add (1|M0) r80.7<1>:d r78.0<0;1,0>:d r80.7<0;1,0>:d Previously, "send" kills r80 entirely, which is incorrect. This change makes sure that "send (1)" only kills r80.0:d, thus the first "mov" instruction is not dead.
1 parent 5afb40f commit af144e6

File tree

3 files changed

+182
-8
lines changed

3 files changed

+182
-8
lines changed

visa/G4_IR.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ G4_InstSend::G4_InstSend(
354354
G4_INST(builder, prd, o, nullptr, g4::NOSAT, size, dst, payload, desc, opt),
355355
msgDesc(md)
356356
{
357+
md->setExecSize(size);
357358
}
358359

359360
G4_InstSend::G4_InstSend(
@@ -372,6 +373,7 @@ G4_InstSend::G4_InstSend(
372373
msgDesc(md)
373374
{
374375
setSrc(extDesc, 3);
376+
md->setExecSize(size);
375377
}
376378

377379
void G4_INST::setOpcode(G4_opcode opcd)

visa/G4_SendDescs.cpp

Lines changed: 156 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,8 @@ G4_SendDescLdSt::G4_SendDescLdSt(
168168
G4_Operand *surf,
169169
ImmOff _immOff,
170170
LdStAttrs _attrs)
171-
: G4_SendDesc(G4_SendDesc::Kind::LDST, sfid),
171+
: G4_SendDesc(G4_SendDesc::Kind::LDST, sfid, _execSize),
172172
op(_op),
173-
execSize(_execSize),
174173
//
175174
addrType(at), addrBits(_addrBits), addrDims(_addrDims),
176175
//
@@ -185,7 +184,7 @@ G4_SendDescLdSt::G4_SendDescLdSt(
185184
static size_t toExecSlots(const G4_SendDescLdSt &d)
186185
{
187186
int minExecSize = 8;
188-
int execSlots = std::max((int)d.execSize, minExecSize);
187+
int execSlots = std::max((int)d.getExecSize(), minExecSize);
189188
return (size_t)execSlots;
190189
}
191190

@@ -586,6 +585,149 @@ bool G4_SendDescRaw::is16BitReturn() const
586585
return desc.layout.returnFormat == 1;
587586
}
588587

588+
bool G4_SendDescRaw::isByteScatterRW() const
589+
{
590+
auto funcID = getSFID();
591+
switch (funcID) {
592+
case SFID::DP_DC0:
593+
switch (getHdcMessageType()) {
594+
case DC_BYTE_SCATTERED_READ:
595+
case DC_BYTE_SCATTERED_WRITE:
596+
return true;
597+
default:
598+
break;
599+
}
600+
break;
601+
case SFID::DP_DC1:
602+
switch (getHdcMessageType()) {
603+
case DC1_A64_SCATTERED_READ:
604+
case DC1_A64_SCATTERED_WRITE:
605+
return (getBlockSize() == 1);
606+
default:
607+
break;
608+
}
609+
break;
610+
case SFID::DP_DC2:
611+
switch (getHdcMessageType()) {
612+
case DC2_A64_SCATTERED_READ:
613+
case DC2_A64_SCATTERED_WRITE:
614+
return (getBlockSize() == 1);
615+
case DC2_BYTE_SCATTERED_READ:
616+
case DC2_BYTE_SCATTERED_WRITE:
617+
return true;
618+
default:
619+
break;
620+
}
621+
break;
622+
default:
623+
break;
624+
}
625+
return false;
626+
}
627+
628+
bool G4_SendDescRaw::isDWScatterRW() const
629+
{
630+
auto funcID = getSFID();
631+
switch (funcID) {
632+
case SFID::DP_DC0:
633+
switch (getHdcMessageType()) {
634+
case DC_DWORD_SCATTERED_READ:
635+
case DC_DWORD_SCATTERED_WRITE:
636+
return true;
637+
default:
638+
break;
639+
}
640+
break;
641+
case SFID::DP_DC1:
642+
switch (getHdcMessageType()) {
643+
case DC1_A64_SCATTERED_READ:
644+
case DC1_A64_SCATTERED_WRITE:
645+
return (getBlockSize() == 4);
646+
default:
647+
break;
648+
}
649+
break;
650+
case SFID::DP_DC2:
651+
switch (getHdcMessageType()) {
652+
case DC2_A64_SCATTERED_READ:
653+
case DC2_A64_SCATTERED_WRITE:
654+
return (getBlockSize() == 4);
655+
default:
656+
break;
657+
}
658+
break;
659+
default:
660+
break;
661+
}
662+
return false;
663+
}
664+
665+
bool G4_SendDescRaw::isQWScatterRW() const
666+
{
667+
auto funcID = getSFID();
668+
switch (funcID) {
669+
case SFID::DP_DC0:
670+
switch (getHdcMessageType()) {
671+
default:
672+
break;
673+
}
674+
break;
675+
case SFID::DP_DC1:
676+
switch (getHdcMessageType()) {
677+
case DC1_A64_SCATTERED_READ:
678+
case DC1_A64_SCATTERED_WRITE:
679+
return (getBlockSize() == 8);
680+
default:
681+
break;
682+
}
683+
break;
684+
case SFID::DP_DC2:
685+
switch (getHdcMessageType()) {
686+
case DC2_A64_SCATTERED_READ:
687+
case DC2_A64_SCATTERED_WRITE:
688+
return (getBlockSize() == 4);
689+
default:
690+
break;
691+
}
692+
break;
693+
default:
694+
break;
695+
}
696+
return false;
697+
}
698+
699+
bool G4_SendDescRaw::isUntypedRW() const
700+
{
701+
auto funcID = getSFID();
702+
switch (funcID) {
703+
case SFID::DP_DC1:
704+
switch (getHdcMessageType()) {
705+
case DC1_UNTYPED_SURFACE_READ:
706+
case DC1_UNTYPED_SURFACE_WRITE:
707+
case DC1_A64_UNTYPED_SURFACE_READ:
708+
case DC1_A64_UNTYPED_SURFACE_WRITE:
709+
return true;
710+
default:
711+
break;
712+
}
713+
break;
714+
case SFID::DP_DC2:
715+
switch (getHdcMessageType()) {
716+
case DC2_UNTYPED_SURFACE_READ:
717+
case DC2_UNTYPED_SURFACE_WRITE:
718+
case DC2_A64_UNTYPED_SURFACE_READ:
719+
case DC2_A64_UNTYPED_SURFACE_WRITE:
720+
return true;
721+
default:
722+
break;
723+
}
724+
break;
725+
default:
726+
break;
727+
}
728+
return false;
729+
}
730+
589731
bool G4_SendDescRaw::isA64Message() const
590732
{
591733
if (!isHDC()) {
@@ -880,6 +1022,17 @@ size_t G4_SendDescRaw::getDstLenBytes() const
8801022
return 32 * getScratchRWSize(); // HWords
8811023
} else if (isOwordLoad()) {
8821024
return 16 * getOwordsAccessed(); // OWords
1025+
} else if (isByteScatterRW()) {
1026+
uint16_t nbytes = getBlockNum();
1027+
// assume 4 at least
1028+
nbytes = (nbytes >= 4 ? nbytes : 4);
1029+
return nbytes * getExecSize();
1030+
} else if (isDWScatterRW()) {
1031+
return 4 * getBlockNum() * getExecSize();
1032+
} else if (isQWScatterRW()) {
1033+
return 8 * getBlockNum() * getExecSize();
1034+
} else if (isUntypedRW()) {
1035+
return 4 * getEnabledChannelNum() * getExecSize();
8831036
} else {
8841037
// fallback to the raw GRF count
8851038
return ResponseLength() * (size_t)getGRFSize();

visa/G4_SendDescs.hpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,20 +179,38 @@ static inline ElemsPerAddr::Chs operator|(
179179

180180
class G4_Operand;
181181

182-
// Base class for all send descriptors
182+
// Base class for all send descriptors.
183+
// (Note that G4_SendDesc could be reused by more than one instruction.)
183184
class G4_SendDesc
184185
{
186+
friend class G4_InstSend;
187+
188+
protected:
189+
// The execution size for this message.
190+
G4_ExecSize execSize;
191+
192+
// Limit access to G4_InstSend and any derived classes.
193+
void setExecSize(G4_ExecSize v) { execSize = v; }
194+
185195
public:
186196
enum class Kind {INVALID, RAW, LDST};
187197

188198
Kind kind;
189199

190200
SFID sfid;
191201

192-
G4_SendDesc(Kind k, SFID _sfid) : kind(k), sfid(_sfid) { }
202+
G4_SendDesc(Kind k, SFID _sfid) : kind(k), sfid(_sfid), execSize(g4::SIMD_UNDEFINED) { }
203+
G4_SendDesc(Kind k, SFID _sfid, G4_ExecSize _execSize)
204+
: kind(k),
205+
sfid(_sfid),
206+
execSize(_execSize)
207+
{}
193208

194209
SFID getSFID() const {return sfid;}
195210

211+
// execSize: need to set it in the ctor
212+
G4_ExecSize getExecSize() const { return execSize; }
213+
196214
bool isRaw() const {return kind == Kind::RAW;}
197215
bool isLdSt() const {return kind == Kind::LDST;}
198216
//
@@ -274,9 +292,6 @@ struct G4_SendDescLdSt : G4_SendDesc {
274292
// The message op
275293
LdStOp op;
276294

277-
// The execution size for this message.
278-
G4_ExecSize execSize;
279-
280295
// E.g. flat, bti, ...
281296
AddrType addrType;
282297
//
@@ -609,6 +624,10 @@ class G4_SendDescRaw : public G4_SendDesc
609624
uint16_t bitV = ((getFuncCtrl() & 0x3000u) >> 12);
610625
return 0x1 << bitV;
611626
}
627+
bool isByteScatterRW() const;
628+
bool isDWScatterRW() const;
629+
bool isQWScatterRW() const;
630+
bool isUntypedRW() const;
612631

613632
bool isA64Message() const;
614633

0 commit comments

Comments
 (0)