Skip to content

Commit 98b5f43

Browse files
petechouigcbot
authored andcommitted
vISA scheduler minor code change.
vISA scheduler minor code change.
1 parent 71bb3c5 commit 98b5f43

File tree

2 files changed

+41
-24
lines changed

2 files changed

+41
-24
lines changed

visa/LocalScheduler/LatencyTable.cpp

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class LatencyTableLegacy : public LatencyTable {
3232
template <PlatformGen Gen>
3333
class LatencyTableXe: public LatencyTable {
3434
// Select latency information based on platform generation.
35-
using LI = typename std::conditional<Gen >= PlatformGen::XE,
36-
XELatencyInfo,
37-
void>::type;
35+
using LI2 = void;
36+
using LI = typename std::conditional<Gen == PlatformGen::XE,
37+
XELatencyInfo, LI2>::type;
3838
public:
3939
LatencyTableXe(const IR_Builder& builder) : LatencyTable(builder) {
4040
static_assert(Gen >= PlatformGen::XE);
@@ -50,11 +50,14 @@ class LatencyTableXe: public LatencyTable {
5050
uint16_t getDPASLatency(uint8_t repeatCount) const override;
5151
private:
5252
uint16_t getMsgLatency(const G4_INST *Inst) const;
53+
uint16_t getSamplerLatency() const;
54+
uint16_t getDPL3Latency() const;
55+
uint16_t getLSCL3Latency(bool typed) const;
5356
uint16_t getMathLatency(const G4_INST *inst) const;
54-
uint16_t getBranchLatency(const G4_INST *inst) const;
57+
uint16_t getBranchLatency() const;
5558
uint16_t getIntrinsicLatency(const G4_INST *inst) const;
5659
uint16_t getDPASLatency(const G4_InstDpas *dpas) const;
57-
uint16_t getARFAccessLatency(const G4_INST *inst) const;
60+
uint16_t getARFAccessLatency() const;
5861
uint16_t getArithmeticLatency(const G4_INST *inst) const;
5962
};
6063

@@ -161,14 +164,14 @@ uint16_t LatencyTableXe<Gen>::getLatency(const G4_INST *Inst) const {
161164
if (Inst->isMath())
162165
return getMathLatency(Inst);
163166
if (Inst->isFlowControl())
164-
return getBranchLatency(Inst);
167+
return getBranchLatency();
165168
if (Inst->isIntrinsic())
166169
return getIntrinsicLatency(Inst);
167170
if (Inst->isDpas())
168171
return getDPASLatency(Inst->asDpasInst());
169172
if (Inst->writesFlag() ||
170173
(Inst->getDst() && Inst->getDst()->isDirectA0()))
171-
return getARFAccessLatency(Inst);
174+
return getARFAccessLatency();
172175
if (Inst->isArithmetic())
173176
return getArithmeticLatency(Inst);
174177

@@ -194,36 +197,50 @@ uint16_t LatencyTableXe<Gen>::getMsgLatency(const G4_INST *Inst) const {
194197
bool isCachedInL1 = MsgDesc->getCachingL1() == Caching::CA ||
195198
(MsgDesc->getCachingL1() != Caching::UC &&
196199
m_builder.getOption(vISA_assumeL1Hit));
197-
if (MsgDesc->isTyped()) {
198-
return isCachedInL1 ? value_of(LI::LSC_TYPED_L1)
199-
: value_of(LI::LSC_TYPED_L3);
200+
bool typed = MsgDesc->isTyped();
201+
if (isCachedInL1) {
202+
return typed ? value_of(LI::LSC_TYPED_L1)
203+
: value_of(LI::LSC_UNTYPED_L1);
200204
} else {
201-
return isCachedInL1 ? value_of(LI::LSC_UNTYPED_L1)
202-
: value_of(LI::LSC_UNTYPED_L3);
205+
return getLSCL3Latency(typed);
203206
}
204207
}
205208
}
206209
if (MsgDesc->isSLM())
207210
return Inst->asSendInst()->isFence() ? value_of(LI::SLM_FENCE)
208211
: value_of(LI::SLM16);
209212
if (MsgDesc->isSampler())
210-
return value_of(LI::SAMPLER_L3);
213+
return getSamplerLatency();
211214
if (MsgDesc->isHDC())
212-
return value_of(LI::DP_L3);
215+
return getDPL3Latency();
213216
if (MsgDesc->isBarrier())
214217
return value_of(LI::BARRIER);
215218
return value_of(LI::SEND_OTHERS);
216219
}
217220

221+
template<PlatformGen Gen>
222+
uint16_t LatencyTableXe<Gen>::getSamplerLatency() const {
223+
return value_of(LI::SAMPLER_L3);
224+
}
225+
226+
template<PlatformGen Gen>
227+
uint16_t LatencyTableXe<Gen>::getLSCL3Latency(bool typed) const {
228+
return value_of(typed ? LI::LSC_TYPED_L3 : LI::LSC_UNTYPED_L3);
229+
}
230+
231+
template<PlatformGen Gen>
232+
uint16_t LatencyTableXe<Gen>::getDPL3Latency() const {
233+
return value_of(LI::DP_L3);
234+
}
235+
218236
template<PlatformGen Gen>
219237
uint16_t LatencyTableXe<Gen>::getMathLatency(const G4_INST *Inst) const {
220238
vASSERT(Inst->isMath());
221239
return value_of(LI::MATH);
222240
}
223241

224242
template<PlatformGen Gen>
225-
uint16_t LatencyTableXe<Gen>::getBranchLatency(const G4_INST *Inst) const {
226-
vASSERT(Inst->isFlowControl());
243+
uint16_t LatencyTableXe<Gen>::getBranchLatency() const {
227244
return value_of(LI::BRANCH);
228245
}
229246

@@ -239,9 +256,7 @@ uint16_t LatencyTableXe<Gen>::getDPASLatency(const G4_InstDpas *dpas) const {
239256
}
240257

241258
template<PlatformGen Gen>
242-
uint16_t LatencyTableXe<Gen>::getARFAccessLatency(const G4_INST *Inst) const {
243-
vASSERT(Inst->writesFlag() ||
244-
(Inst->getDst() && Inst->getDst()->isDirectA0()));
259+
uint16_t LatencyTableXe<Gen>::getARFAccessLatency() const {
245260
return value_of(LI::ARF);
246261
}
247262

@@ -293,8 +308,8 @@ LatencyTableXe<PlatformGen::XE>::getDPASLatency(uint8_t repeatCount) const {
293308
case Xe_PVCXT:
294309
return value_of(LI::DPAS) + repeatCount;
295310
default: // Not supported platform
296-
// TODO: Add vISA_ASSERT_UNREACHABLE.
297-
return 46;
311+
vISA_ASSERT_UNREACHABLE("Unsupported platform");
312+
return value_of(LI::UNKNOWN);
298313
}
299314
}
300315
template<>

visa/LocalScheduler/SWSB_G4IR.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ class SWSB {
957957
const unsigned tokenAfterWriteSendSlmCycle;
958958
const unsigned tokenAfterWriteSendMemoryCycle;
959959
const unsigned tokenAfterWriteSendSamplerCycle;
960-
int tokenAfterDPASCycle;
960+
int tokenAfterDPASCycle = 0;
961961

962962
// For profiling
963963
uint32_t syncInstCount = 0;
@@ -1156,8 +1156,10 @@ class SWSB {
11561156
indexes.longIndex = 0;
11571157
indexes.DPASIndex = 0;
11581158
indexes.mathIndex = 0;
1159-
tokenAfterDPASCycle =
1160-
LatencyTable::createLatencyTable(*k.fg.builder)->getDPASLatency(8);
1159+
if (k.fg.builder->hasDPAS()) {
1160+
tokenAfterDPASCycle =
1161+
LatencyTable::createLatencyTable(*k.fg.builder)->getDPASLatency(8);
1162+
}
11611163
}
11621164
~SWSB() {}
11631165
void SWSBGenerator();

0 commit comments

Comments
 (0)