@@ -32,9 +32,9 @@ class LatencyTableLegacy : public LatencyTable {
32
32
template <PlatformGen Gen>
33
33
class LatencyTableXe : public LatencyTable {
34
34
// Select latency information based on platform generation.
35
- using LI2 = void ;
36
- using LI = typename std::conditional<Gen == PlatformGen::XE ,
37
- XELatencyInfo, LI2 >::type;
35
+ using LI = typename std::conditional<Gen >= PlatformGen::XE,
36
+ XELatencyInfo ,
37
+ void >::type;
38
38
public:
39
39
LatencyTableXe (const IR_Builder& builder) : LatencyTable(builder) {
40
40
static_assert (Gen >= PlatformGen::XE);
@@ -50,14 +50,11 @@ class LatencyTableXe: public LatencyTable {
50
50
uint16_t getDPASLatency (uint8_t repeatCount) const override ;
51
51
private:
52
52
uint16_t getMsgLatency (const G4_INST *Inst) const ;
53
- uint16_t getSamplerLatency () const ;
54
- uint16_t getDPL3Latency () const ;
55
- uint16_t getLSCL3Latency (bool typed) const ;
56
53
uint16_t getMathLatency (const G4_INST *inst) const ;
57
- uint16_t getBranchLatency () const ;
54
+ uint16_t getBranchLatency (const G4_INST *inst ) const ;
58
55
uint16_t getIntrinsicLatency (const G4_INST *inst) const ;
59
56
uint16_t getDPASLatency (const G4_InstDpas *dpas) const ;
60
- uint16_t getARFAccessLatency () const ;
57
+ uint16_t getARFAccessLatency (const G4_INST *inst ) const ;
61
58
uint16_t getArithmeticLatency (const G4_INST *inst) const ;
62
59
};
63
60
@@ -164,14 +161,14 @@ uint16_t LatencyTableXe<Gen>::getLatency(const G4_INST *Inst) const {
164
161
if (Inst->isMath ())
165
162
return getMathLatency (Inst);
166
163
if (Inst->isFlowControl ())
167
- return getBranchLatency ();
164
+ return getBranchLatency (Inst );
168
165
if (Inst->isIntrinsic ())
169
166
return getIntrinsicLatency (Inst);
170
167
if (Inst->isDpas ())
171
168
return getDPASLatency (Inst->asDpasInst ());
172
169
if (Inst->writesFlag () ||
173
170
(Inst->getDst () && Inst->getDst ()->isDirectA0 ()))
174
- return getARFAccessLatency ();
171
+ return getARFAccessLatency (Inst );
175
172
if (Inst->isArithmetic ())
176
173
return getArithmeticLatency (Inst);
177
174
@@ -197,50 +194,36 @@ uint16_t LatencyTableXe<Gen>::getMsgLatency(const G4_INST *Inst) const {
197
194
bool isCachedInL1 = MsgDesc->getCachingL1 () == Caching::CA ||
198
195
(MsgDesc->getCachingL1 () != Caching::UC &&
199
196
m_builder.getOption (vISA_assumeL1Hit));
200
- bool typed = MsgDesc->isTyped ();
201
- if (isCachedInL1) {
202
- return typed ? value_of (LI::LSC_TYPED_L1)
203
- : value_of (LI::LSC_UNTYPED_L1);
197
+ if (MsgDesc->isTyped ()) {
198
+ return isCachedInL1 ? value_of (LI::LSC_TYPED_L1)
199
+ : value_of (LI::LSC_TYPED_L3);
204
200
} else {
205
- return getLSCL3Latency (typed);
201
+ return isCachedInL1 ? value_of (LI::LSC_UNTYPED_L1)
202
+ : value_of (LI::LSC_UNTYPED_L3);
206
203
}
207
204
}
208
205
}
209
206
if (MsgDesc->isSLM ())
210
207
return Inst->asSendInst ()->isFence () ? value_of (LI::SLM_FENCE)
211
208
: value_of (LI::SLM16);
212
209
if (MsgDesc->isSampler ())
213
- return getSamplerLatency ( );
210
+ return value_of (LI::SAMPLER_L3 );
214
211
if (MsgDesc->isHDC ())
215
- return getDPL3Latency ( );
212
+ return value_of (LI::DP_L3 );
216
213
if (MsgDesc->isBarrier ())
217
214
return value_of (LI::BARRIER);
218
215
return value_of (LI::SEND_OTHERS);
219
216
}
220
217
221
- template <PlatformGen Gen>
222
- uint16_t LatencyTableXe<Gen>::getSamplerLatency() const {
223
- return value_of (LI::SAMPLER_L3);
224
- }
225
-
226
- template <PlatformGen Gen>
227
- uint16_t LatencyTableXe<Gen>::getLSCL3Latency(bool typed) const {
228
- return value_of (typed ? LI::LSC_TYPED_L3 : LI::LSC_UNTYPED_L3);
229
- }
230
-
231
- template <PlatformGen Gen>
232
- uint16_t LatencyTableXe<Gen>::getDPL3Latency() const {
233
- return value_of (LI::DP_L3);
234
- }
235
-
236
218
template <PlatformGen Gen>
237
219
uint16_t LatencyTableXe<Gen>::getMathLatency(const G4_INST *Inst) const {
238
220
vASSERT (Inst->isMath ());
239
221
return value_of (LI::MATH);
240
222
}
241
223
242
224
template <PlatformGen Gen>
243
- uint16_t LatencyTableXe<Gen>::getBranchLatency() const {
225
+ uint16_t LatencyTableXe<Gen>::getBranchLatency(const G4_INST *Inst) const {
226
+ vASSERT (Inst->isFlowControl ());
244
227
return value_of (LI::BRANCH);
245
228
}
246
229
@@ -256,7 +239,9 @@ uint16_t LatencyTableXe<Gen>::getDPASLatency(const G4_InstDpas *dpas) const {
256
239
}
257
240
258
241
template <PlatformGen Gen>
259
- uint16_t LatencyTableXe<Gen>::getARFAccessLatency() const {
242
+ uint16_t LatencyTableXe<Gen>::getARFAccessLatency(const G4_INST *Inst) const {
243
+ vASSERT (Inst->writesFlag () ||
244
+ (Inst->getDst () && Inst->getDst ()->isDirectA0 ()));
260
245
return value_of (LI::ARF);
261
246
}
262
247
@@ -308,8 +293,8 @@ LatencyTableXe<PlatformGen::XE>::getDPASLatency(uint8_t repeatCount) const {
308
293
case Xe_PVCXT:
309
294
return value_of (LI::DPAS) + repeatCount;
310
295
default : // Not supported platform
311
- vISA_ASSERT_UNREACHABLE ( " Unsupported platform " );
312
- return value_of (LI::UNKNOWN) ;
296
+ // TODO: Add vISA_ASSERT_UNREACHABLE.
297
+ return 46 ;
313
298
}
314
299
}
315
300
template <>
0 commit comments