@@ -32,9 +32,9 @@ class LatencyTableLegacy : public LatencyTable {
32
32
template <PlatformGen Gen>
33
33
class LatencyTableXe : public LatencyTable {
34
34
// Select latency information based on platform generation.
35
- using LI = typename std::conditional<Gen >= PlatformGen::XE,
36
- XELatencyInfo ,
37
- void >::type;
35
+ using LI2 = void ;
36
+ using LI = typename std::conditional<Gen == PlatformGen::XE ,
37
+ XELatencyInfo, LI2 >::type;
38
38
public:
39
39
LatencyTableXe (const IR_Builder& builder) : LatencyTable(builder) {
40
40
static_assert (Gen >= PlatformGen::XE);
@@ -50,11 +50,14 @@ class LatencyTableXe: public LatencyTable {
50
50
uint16_t getDPASLatency (uint8_t repeatCount) const override ;
51
51
private:
52
52
uint16_t getMsgLatency (const G4_INST *Inst) const ;
53
+ uint16_t getSamplerLatency () const ;
54
+ uint16_t getDPL3Latency () const ;
55
+ uint16_t getLSCL3Latency (bool typed) const ;
53
56
uint16_t getMathLatency (const G4_INST *inst) const ;
54
- uint16_t getBranchLatency (const G4_INST *inst ) const ;
57
+ uint16_t getBranchLatency () const ;
55
58
uint16_t getIntrinsicLatency (const G4_INST *inst) const ;
56
59
uint16_t getDPASLatency (const G4_InstDpas *dpas) const ;
57
- uint16_t getARFAccessLatency (const G4_INST *inst ) const ;
60
+ uint16_t getARFAccessLatency () const ;
58
61
uint16_t getArithmeticLatency (const G4_INST *inst) const ;
59
62
};
60
63
@@ -161,14 +164,14 @@ uint16_t LatencyTableXe<Gen>::getLatency(const G4_INST *Inst) const {
161
164
if (Inst->isMath ())
162
165
return getMathLatency (Inst);
163
166
if (Inst->isFlowControl ())
164
- return getBranchLatency (Inst );
167
+ return getBranchLatency ();
165
168
if (Inst->isIntrinsic ())
166
169
return getIntrinsicLatency (Inst);
167
170
if (Inst->isDpas ())
168
171
return getDPASLatency (Inst->asDpasInst ());
169
172
if (Inst->writesFlag () ||
170
173
(Inst->getDst () && Inst->getDst ()->isDirectA0 ()))
171
- return getARFAccessLatency (Inst );
174
+ return getARFAccessLatency ();
172
175
if (Inst->isArithmetic ())
173
176
return getArithmeticLatency (Inst);
174
177
@@ -194,36 +197,50 @@ uint16_t LatencyTableXe<Gen>::getMsgLatency(const G4_INST *Inst) const {
194
197
bool isCachedInL1 = MsgDesc->getCachingL1 () == Caching::CA ||
195
198
(MsgDesc->getCachingL1 () != Caching::UC &&
196
199
m_builder.getOption (vISA_assumeL1Hit));
197
- if (MsgDesc->isTyped ()) {
198
- return isCachedInL1 ? value_of (LI::LSC_TYPED_L1)
199
- : value_of (LI::LSC_TYPED_L3);
200
+ bool typed = MsgDesc->isTyped ();
201
+ if (isCachedInL1) {
202
+ return typed ? value_of (LI::LSC_TYPED_L1)
203
+ : value_of (LI::LSC_UNTYPED_L1);
200
204
} else {
201
- return isCachedInL1 ? value_of (LI::LSC_UNTYPED_L1)
202
- : value_of (LI::LSC_UNTYPED_L3);
205
+ return getLSCL3Latency (typed);
203
206
}
204
207
}
205
208
}
206
209
if (MsgDesc->isSLM ())
207
210
return Inst->asSendInst ()->isFence () ? value_of (LI::SLM_FENCE)
208
211
: value_of (LI::SLM16);
209
212
if (MsgDesc->isSampler ())
210
- return value_of (LI::SAMPLER_L3 );
213
+ return getSamplerLatency ( );
211
214
if (MsgDesc->isHDC ())
212
- return value_of (LI::DP_L3 );
215
+ return getDPL3Latency ( );
213
216
if (MsgDesc->isBarrier ())
214
217
return value_of (LI::BARRIER);
215
218
return value_of (LI::SEND_OTHERS);
216
219
}
217
220
221
+ template <PlatformGen Gen>
222
+ uint16_t LatencyTableXe<Gen>::getSamplerLatency() const {
223
+ return value_of (LI::SAMPLER_L3);
224
+ }
225
+
226
+ template <PlatformGen Gen>
227
+ uint16_t LatencyTableXe<Gen>::getLSCL3Latency(bool typed) const {
228
+ return value_of (typed ? LI::LSC_TYPED_L3 : LI::LSC_UNTYPED_L3);
229
+ }
230
+
231
+ template <PlatformGen Gen>
232
+ uint16_t LatencyTableXe<Gen>::getDPL3Latency() const {
233
+ return value_of (LI::DP_L3);
234
+ }
235
+
218
236
template <PlatformGen Gen>
219
237
uint16_t LatencyTableXe<Gen>::getMathLatency(const G4_INST *Inst) const {
220
238
vASSERT (Inst->isMath ());
221
239
return value_of (LI::MATH);
222
240
}
223
241
224
242
template <PlatformGen Gen>
225
- uint16_t LatencyTableXe<Gen>::getBranchLatency(const G4_INST *Inst) const {
226
- vASSERT (Inst->isFlowControl ());
243
+ uint16_t LatencyTableXe<Gen>::getBranchLatency() const {
227
244
return value_of (LI::BRANCH);
228
245
}
229
246
@@ -239,9 +256,7 @@ uint16_t LatencyTableXe<Gen>::getDPASLatency(const G4_InstDpas *dpas) const {
239
256
}
240
257
241
258
template <PlatformGen Gen>
242
- uint16_t LatencyTableXe<Gen>::getARFAccessLatency(const G4_INST *Inst) const {
243
- vASSERT (Inst->writesFlag () ||
244
- (Inst->getDst () && Inst->getDst ()->isDirectA0 ()));
259
+ uint16_t LatencyTableXe<Gen>::getARFAccessLatency() const {
245
260
return value_of (LI::ARF);
246
261
}
247
262
@@ -293,8 +308,8 @@ LatencyTableXe<PlatformGen::XE>::getDPASLatency(uint8_t repeatCount) const {
293
308
case Xe_PVCXT:
294
309
return value_of (LI::DPAS) + repeatCount;
295
310
default : // Not supported platform
296
- // TODO: Add vISA_ASSERT_UNREACHABLE.
297
- return 46 ;
311
+ vISA_ASSERT_UNREACHABLE ( " Unsupported platform " );
312
+ return value_of (LI::UNKNOWN) ;
298
313
}
299
314
}
300
315
template <>
0 commit comments