Skip to content

Commit ba8198d

Browse files
weiyu-chensys_zuul
authored andcommitted
Fix source region generation for HWConformity's insertMovBefore().
Change-Id: Ic31fba2e8e20a15b1e0dc8467004a79d7ceadaa6
1 parent 776f6cf commit ba8198d

File tree

2 files changed

+20
-31
lines changed

2 files changed

+20
-31
lines changed

visa/Gen4_IR.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1522,7 +1522,10 @@ struct RegionDesc
15221522
const uint16_t width;
15231523
const uint16_t horzStride;
15241524

1525-
RegionDesc(uint16_t vs, uint16_t w, uint16_t hs) : vertStride(vs), width(w), horzStride(hs) {}
1525+
RegionDesc(uint16_t vs, uint16_t w, uint16_t hs) : vertStride(vs), width(w), horzStride(hs)
1526+
{
1527+
assert(isLegal() && "illegal region desc");
1528+
}
15261529
void* operator new(size_t sz, vISA::Mem_Manager& m) {return m.alloc(sz);}
15271530

15281531
// The legal values for Width are {1, 2, 4, 8, 16}.

visa/HWConformity.cpp

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -310,58 +310,44 @@ G4_Operand* HWConformity::insertMovBefore(
310310
{
311311
G4_INST* inst = *it;
312312
G4_SubReg_Align subAlign;
313-
const RegionDesc* region = NULL;
314-
unsigned short vs = 0, hs = 0, wd = 1;
313+
const RegionDesc* region = nullptr;
315314
unsigned char exec_size = inst->getExecSize();
316315
G4_Operand *src = inst->getSrc( srcNum );
317-
unsigned short scale = IS_BTYPE( src->getType() ) && src->getType() == type ? 2 : 1;
316+
unsigned short scale = IS_BTYPE(src->getType()) && src->getType() == type ? 2 : 1;
318317

319-
uint8_t newExecSize = ((src->isImm() && !IS_VTYPE(src->getType())) ||
320-
(src->isSrcRegRegion() && src->asSrcRegRegion()->isScalar()))
321-
? 1 : exec_size;
318+
uint8_t newExecSize = (src->isImm() && !IS_VTYPE(src->getType())) ||
319+
(src->isSrcRegRegion() && src->asSrcRegRegion()->isScalar())
320+
? 1 : exec_size;
322321

323-
if( newExecSize > 1 )
322+
if (newExecSize > 1)
324323
{
325324
if (scale == 1 && !IS_VTYPE(src->getType()))
326325
{
327-
scale = (unsigned short) (G4_Type_Table[src->getType()].byteSize / G4_Type_Table[type].byteSize);
326+
scale = (uint16_t)(getTypeSize(src->getType()) / getTypeSize(type));
328327
}
329-
if( scale == 0 )
328+
if (scale == 0)
330329
{
331330
scale = 1;
332331
}
333-
hs = scale;
334-
if( isCompressedInst(inst) || G4_Type_Table[type].byteSize * exec_size * hs > G4_GRF_REG_NBYTES )
335-
{
336-
wd = exec_size / 2;
337-
}
338-
else
339-
{
340-
wd = exec_size;
341-
}
342-
vs = wd * hs;
332+
region = builder.createRegionDesc(scale, 1, 0);
343333
}
344334
else
345335
{
346-
vs = 0;
347-
wd = 1;
348-
hs = 0;
349-
scale = (unsigned short)(G4_Type_Table[src->getType()].byteSize / G4_Type_Table[type].byteSize);
336+
scale = (uint16_t)(getTypeSize(src->getType()) / getTypeSize(type));
350337
if (scale == 0)
351338
{
352339
scale = 1;
353340
}
341+
region = builder.getRegionScalar();
354342
}
355343

356-
region = builder.createRegionDesc(vs, wd, hs);
357-
358344
int opExecWidthBytes = IS_VINTTYPE(src->getType()) ?
359-
G4_GRF_REG_NBYTES/2 * ( exec_size > 8 ? exec_size/8 : 1 ) :
360-
( src->getType() == Type_VF ?
361-
G4_GRF_REG_NBYTES/2 * ( exec_size > 4 ? exec_size/4 : 1 ) :
362-
newExecSize * G4_Type_Table[type].byteSize * scale );
345+
G4_GRF_REG_NBYTES / 2 * (exec_size > 8 ? exec_size / 8 : 1) :
346+
(src->getType() == Type_VF ?
347+
G4_GRF_REG_NBYTES / 2 * (exec_size > 4 ? exec_size / 4 : 1) :
348+
newExecSize * getTypeSize(type) * scale);
363349

364-
subAlign = getDclAlignment( opExecWidthBytes, inst, newExecSize == 1);
350+
subAlign = getDclAlignment(opExecWidthBytes, inst, newExecSize == 1);
365351

366352
if (subAlign < tmpAlign)
367353
{

0 commit comments

Comments
 (0)