Skip to content

Commit 135beb2

Browse files
committed
[Autobackout][Manual]Revert of change: 8b0b9d1
Refactor alignment code. Even align variables with size between 1-2GRF. Variables > 2GRF use weak edges. HSD/Radar: n/a Change-Id: I391082c7ee5f68655d9272994e76576e00f18477
1 parent 1cb3a29 commit 135beb2

File tree

5 files changed

+37
-181
lines changed

5 files changed

+37
-181
lines changed

visa/FlowGraph.cpp

Lines changed: 1 addition & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -4412,102 +4412,6 @@ void GlobalOpndHashTable::dump()
44124412
}
44134413
}
44144414

4415-
void G4_Kernel::computeChannelSlicing()
4416-
{
4417-
std::unordered_set<G4_Declare*> skipSendDcls;
4418-
unsigned int simdSize = getSimdSize();
4419-
channelSliced = true;
4420-
4421-
if (simdSize == 8 || simdSize == 16)
4422-
{
4423-
// SIMD8/16 kernels are not sliced
4424-
channelSliced = false;
4425-
return;
4426-
}
4427-
4428-
for (auto bb : fg)
4429-
{
4430-
for (auto inst : bb->getInstList())
4431-
{
4432-
if (inst->isPseudoKill() || inst->isWriteEnableInst())
4433-
continue;
4434-
4435-
if (inst->isSend())
4436-
{
4437-
auto dst = inst->getDst();
4438-
if (dst && dst->isDstRegRegion())
4439-
skipSendDcls.insert(dst->getTopDcl());
4440-
4441-
auto src = inst->getSrc(0);
4442-
if (src && src->isSrcRegRegion())
4443-
skipSendDcls.insert(src->getTopDcl());
4444-
4445-
src = inst->getSrc(1);
4446-
if (src && src->isSrcRegRegion())
4447-
skipSendDcls.insert(src->getTopDcl());
4448-
}
4449-
}
4450-
}
4451-
4452-
// .dcl V1 size = 128 bytes
4453-
// op (16|M0) V1(0,0) ..
4454-
// op (16|M16) V1(2,0) ..
4455-
// For above sequence, return 32. Instruction
4456-
// is broken in to 2 only due to hw restriction.
4457-
// Allocation of dcl is still as if it were a
4458-
// SIMD32 kernel.
4459-
4460-
// dcl -> lb, rb, emask offset
4461-
std::unordered_map<G4_Declare*, std::vector<std::tuple<unsigned int, unsigned int, unsigned int>>> defaultDefs;
4462-
for (auto bb : fg)
4463-
{
4464-
for (auto inst : bb->getInstList())
4465-
{
4466-
auto dst = inst->getDst();
4467-
if (!dst || !dst->isDstRegRegion() || !dst->getTopDcl() ||
4468-
skipSendDcls.find(dst->getTopDcl()) != skipSendDcls.end() ||
4469-
dst->asDstRegRegion()->getHorzStride() != 1)
4470-
continue;
4471-
4472-
auto regFileKind = dst->getTopDcl()->getRegFile();
4473-
if (regFileKind != G4_RegFileKind::G4_GRF && regFileKind != G4_RegFileKind::G4_INPUT)
4474-
continue;
4475-
4476-
auto dstElemSize = G4_Type_Table[dst->getType()].byteSize;
4477-
4478-
if (dst->getTopDcl()->getByteSize() <= dstElemSize * simdSize)
4479-
continue;
4480-
4481-
std::vector<std::tuple<unsigned int, unsigned int, unsigned int>> v =
4482-
{ std::make_tuple(dst->getLeftBound(), dst->getRightBound(), inst->getMaskOffset()) };
4483-
defaultDefs.insert(std::make_pair(dst->getTopDcl(), v));
4484-
}
4485-
}
4486-
4487-
for (auto dd : defaultDefs)
4488-
{
4489-
auto elemSize = dd.first->getElemSize();
4490-
for (auto defs : dd.second)
4491-
{
4492-
auto lb = std::get<0>(defs);
4493-
auto rb = std::get<1>(defs);
4494-
auto emaskOffset = std::get<2>(defs);
4495-
4496-
// Look for single instruction
4497-
if (emaskOffset == 0 && lb == 0 && rb == elemSize * 32)
4498-
channelSliced = false;
4499-
// Or broken instruction
4500-
if (emaskOffset == 16 && lb == elemSize * 16 && rb == elemSize * 32)
4501-
channelSliced = false;
4502-
}
4503-
4504-
if (!channelSliced)
4505-
break;
4506-
}
4507-
4508-
return;
4509-
}
4510-
45114415
void G4_Kernel::calculateSimdSize()
45124416
{
45134417
// Iterate over all instructions in kernel to check
@@ -4535,19 +4439,15 @@ void G4_Kernel::calculateSimdSize()
45354439
if (size > 16)
45364440
{
45374441
simdSize = 32;
4538-
break;
4442+
return;
45394443
}
45404444
else if (size > 8)
45414445
{
45424446
simdSize = 16;
45434447
}
45444448
}
45454449
}
4546-
if (simdSize == 32)
4547-
break;
45484450
}
4549-
4550-
computeChannelSlicing();
45514451
}
45524452

45534453
void G4_Kernel::dump() const

visa/FlowGraph.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,6 @@ class G4_Kernel
12971297
const char* name;
12981298
unsigned numRegTotal;
12991299
unsigned int simdSize;
1300-
bool channelSliced = true;
13011300
bool hasAddrTaken;
13021301
Options *m_options;
13031302

@@ -1441,11 +1440,8 @@ class G4_Kernel
14411440

14421441
Options *getOptions(){ return m_options; }
14431442
bool getOption(vISAOptions opt) const { return m_options->getOption(opt); }
1444-
void computeChannelSlicing();
14451443
void calculateSimdSize();
14461444
unsigned int getSimdSize() { return simdSize; }
1447-
bool getChannelSlicing() { return channelSliced; }
1448-
unsigned int getSimdSizeWithSlicing() { return channelSliced ? simdSize/2 : simdSize; }
14491445

14501446
void setHasAddrTaken(bool val) { hasAddrTaken = val; }
14511447
bool getHasAddrTaken() { return hasAddrTaken; }

visa/GraphColor.cpp

Lines changed: 35 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,48 +2228,34 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign)
22282228
}
22292229
}
22302230

2231-
bool GlobalRA::evenAlignNeeded(G4_Declare* dcl)
2232-
{
2233-
// Return true if even alignment is needed
2234-
// Even align needed if for given SIMD size and elem type,
2235-
// a complete def uses between 1-2 GRFs.
2236-
auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing();
2237-
G4_Declare* topdcl = dcl->getRootDeclare();
2238-
auto topdclAugMask = getAugmentationMask(topdcl);
2239-
2240-
if (!areAllDefsNoMask(topdcl) && !topdcl->getIsPartialDcl() &&
2241-
topdclAugMask != AugmentationMasks::NonDefault)
2242-
{
2243-
auto elemSizeToUse = topdcl->getElemSize();
2244-
if (elemSizeToUse < 4 && topdclAugMask == AugmentationMasks::Default32Bit)
2245-
// :uw with hstride 2 can also be Default32Bit and hence needs even alignment
2246-
elemSizeToUse = 4;
2247-
else if (elemSizeToUse < 8 && topdclAugMask == AugmentationMasks::Default64Bit)
2248-
elemSizeToUse = 8;
2249-
2250-
if (// Even align if size is between 1-2 GRFs, for >2GRF sizes use weak edges
2251-
(elemSizeToUse * kernelSimdSizeToUse) > (unsigned int)GENX_GRF_REG_SIZ &&
2252-
(elemSizeToUse * kernelSimdSizeToUse) <= (unsigned int)(2 * GENX_GRF_REG_SIZ) &&
2253-
!(kernel.fg.builder->getOption(vISA_enablePreemption) &&
2254-
dcl == kernel.fg.builder->getBuiltinR0()))
2255-
{
2256-
return true;
2257-
}
2258-
}
2259-
return false;
2260-
}
2261-
22622231
// This function can be invoked before local RA or after augmentation.
2232+
// When invoked before local RA, it sets all vars to be Even aligned,
2233+
// including NoMask ones. This is safe, but conservative. Post
2234+
// augmentation, dcl masks are available so only non-NoMask vars will
2235+
// be Even aligned. Others will be Either aligned. There is no need
2236+
// to store old value of align because HW has no restriction on
2237+
// even/odd alignment that HW conformity computes.
22632238
void GlobalRA::evenAlign()
22642239
{
22652240
// Update alignment of all GRF declares to align
22662241
for (auto dcl : kernel.Declares)
22672242
{
22682243
if (dcl->getRegFile() & G4_GRF)
22692244
{
2270-
if (evenAlignNeeded(dcl))
2245+
G4_Declare* topdcl = dcl->getRootDeclare();
2246+
auto topdclAugMask = getAugmentationMask(topdcl);
2247+
2248+
if (!areAllDefsNoMask(topdcl) && !topdcl->getIsPartialDcl() &&
2249+
topdclAugMask != AugmentationMasks::NonDefault &&
2250+
topdclAugMask != AugmentationMasks::Default64Bit)
22712251
{
2272-
setEvenAligned(dcl, true);
2252+
if ((topdcl->getElemSize() >= 4 || topdclAugMask == AugmentationMasks::Default32Bit) &&
2253+
topdcl->getByteSize() >= GENX_GRF_REG_SIZ &&
2254+
!(kernel.fg.builder->getOption(vISA_enablePreemption) &&
2255+
dcl == kernel.fg.builder->getBuiltinR0()))
2256+
{
2257+
setEvenAligned(dcl, true);
2258+
}
22732259
}
22742260
}
22752261
}
@@ -3127,7 +3113,9 @@ bool Augmentation::markNonDefaultMaskDef()
31273113
prevAugMask = gra.getAugmentationMask(dcl);
31283114
}
31293115

3130-
if (gra.evenAlignNeeded(dcl))
3116+
if (liveAnalysis.livenessClass(G4_GRF) &&
3117+
gra.getAugmentationMask(dcl) == AugmentationMasks::Default32Bit &&
3118+
kernel.getSimdSize() > NUM_DWORDS_PER_GRF)
31313119
{
31323120
auto dclLR = gra.getLocalLR(dcl);
31333121
if (dclLR)
@@ -3136,7 +3124,7 @@ bool Augmentation::markNonDefaultMaskDef()
31363124
auto phyReg = dclLR->getPhyReg(s);
31373125
if (phyReg && phyReg->asGreg()->getRegNum() % 2 != 0)
31383126
{
3139-
// If LRA assignment is not 2GRF aligned for then
3127+
// If LRA assignment is not 2GRF aligned for SIMD16 then
31403128
// mark it as non-default. GRA candidates cannot fully
31413129
// overlap with such ranges. Partial overlap is illegal.
31423130
gra.setAugmentationMask(dcl, AugmentationMasks::NonDefault);
@@ -4178,22 +4166,6 @@ bool Interference::isStrongEdgeBetween(G4_Declare* dcl1, G4_Declare* dcl2)
41784166
return false;
41794167
}
41804168

4181-
bool Augmentation::weakEdgeNeeded(AugmentationMasks m)
4182-
{
4183-
// Weak edge needed in case #GRF exceeds 2
4184-
4185-
if (m == AugmentationMasks::Default64Bit)
4186-
return (G4_Type_Table[Type_Q].byteSize*kernel.getSimdSizeWithSlicing()) > (unsigned int)(2 * GENX_GRF_REG_SIZ);
4187-
4188-
if (m == AugmentationMasks::Default32Bit)
4189-
{
4190-
// Even align up to 2 GRFs size variable, use weak edges beyond
4191-
return (G4_Type_Table[Type_D].byteSize*kernel.getSimdSizeWithSlicing()) > (unsigned int)(2 * GENX_GRF_REG_SIZ);
4192-
}
4193-
4194-
return false;
4195-
}
4196-
41974169
//
41984170
// Mark interference between newDcl and other incompatible dcls in current active lists.
41994171
//
@@ -4211,8 +4183,10 @@ void Augmentation::buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall)
42114183
{
42124184
if (liveAnalysis.livenessClass(G4_GRF) &&
42134185
// Populate compatible sparse intf data structure
4214-
// only for weak edges.
4215-
weakEdgeNeeded(newDclAugMask))
4186+
// only for 64-bit bit types since others can be
4187+
// handled using Even align.
4188+
gra.getAugmentationMask(defaultDcl) == AugmentationMasks::Default64Bit &&
4189+
newDclAugMask == AugmentationMasks::Default64Bit)
42164190
{
42174191
if (defaultDcl->getRegVar()->isPhyRegAssigned() &&
42184192
newDcl->getRegVar()->isPhyRegAssigned())
@@ -4428,7 +4402,7 @@ void Augmentation::augmentIntfGraph()
44284402

44294403
if (liveAnalysis.livenessClass(G4_GRF))
44304404
{
4431-
if (kernel.getSimdSize() >= NUM_DWORDS_PER_GRF)
4405+
if (kernel.getSimdSize() > NUM_DWORDS_PER_GRF)
44324406
{
44334407
// Set alignment of all GRF candidates
44344408
// to 2GRF except for NoMask variables
@@ -10570,9 +10544,9 @@ void VerifyAugmentation::verifyAlign(G4_Declare* dcl)
1057010544
if (it == masks.end())
1057110545
return;
1057210546

10573-
if (dcl->getByteSize() >= NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD].byteSize &&
10574-
dcl->getByteSize() <= 2 * NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD].byteSize &&
10575-
kernel->getSimdSize() > NUM_DWORDS_PER_GRF)
10547+
auto dclMask = std::get<1>((*it).second);
10548+
10549+
if (dclMask == AugmentationMasks::Default32Bit)
1057610550
{
1057710551
auto assignment = dcl->getRegVar()->getPhyReg();
1057810552
if (assignment && assignment->isGreg())
@@ -10668,14 +10642,6 @@ void VerifyAugmentation::labelBBs()
1066810642
#endif
1066910643
}
1067010644

10671-
unsigned int getGRFBaseOffset(G4_Declare* dcl)
10672-
{
10673-
unsigned int regNum = dcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
10674-
unsigned int regOff = dcl->getRegVar()->getPhyRegOff();
10675-
auto type = dcl->getElemType();
10676-
return (regNum * G4_GRF_REG_NBYTES) + (regOff * getTypeSize(type));
10677-
}
10678-
1067910645
bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
1068010646
{
1068110647
bool interferes = true;
@@ -10744,8 +10710,8 @@ bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
1074410710

1074510711
if (lr1->getAssigned() && lr2->getAssigned())
1074610712
{
10747-
auto preg1Start = getGRFBaseOffset(dcl1);
10748-
auto preg2Start = getGRFBaseOffset(dcl2);
10713+
auto preg1Start = dcl1->getGRFBaseOffset();
10714+
auto preg2Start = dcl2->getGRFBaseOffset();
1074910715
auto preg1End = preg1Start + dcl1->getByteSize();
1075010716
auto preg2End = preg2Start + dcl2->getByteSize();
1075110717

@@ -10790,8 +10756,8 @@ void VerifyAugmentation::verify()
1079010756
{
1079110757
if (dcl1->getRegFile() == G4_RegFileKind::G4_GRF && dcl2->getRegFile() == G4_RegFileKind::G4_GRF)
1079210758
{
10793-
auto preg1Start = getGRFBaseOffset(dcl1);
10794-
auto preg2Start = getGRFBaseOffset(dcl2);
10759+
auto preg1Start = dcl1->getGRFBaseOffset();
10760+
auto preg2Start = dcl2->getGRFBaseOffset();
1079510761
auto preg1End = preg1Start + dcl1->getByteSize();
1079610762
auto preg2End = preg2Start + dcl2->getByteSize();
1079710763

@@ -10857,9 +10823,6 @@ void VerifyAugmentation::verify()
1085710823
{
1085810824
bool interfere = interfereBetween(activeDcl, dcl);
1085910825

10860-
if (activeDcl->getIsPartialDcl() || dcl->getIsPartialDcl())
10861-
continue;
10862-
1086310826
if (!interfere)
1086410827
{
1086510828
std::cerr << dcl->getRegVar()->getName() << "(" << getStr(dclMask) << ") and " << activeDcl->getRegVar()->getName() << "(" <<

visa/GraphColor.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,6 @@ namespace vISA
310310
void buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall);
311311
void buildSIMDIntfAll(G4_Declare* newDcl);
312312
void handleSIMDIntf(G4_Declare* firstDcl, G4_Declare* secondDcl, bool isCall);
313-
bool weakEdgeNeeded(AugmentationMasks);
314313

315314
public:
316315
Augmentation(G4_Kernel& k, Interference& i, LivenessAnalysis& l, LiveRange* ranges[], GlobalRA& g);
@@ -1202,9 +1201,7 @@ namespace vISA
12021201
static uint32_t getRefCount(int loopNestLevel);
12031202
bool isReRAPass();
12041203
void updateSubRegAlignment(G4_SubReg_Align subAlign);
1205-
bool isChannelSliced();
12061204
void evenAlign();
1207-
bool evenAlignNeeded(G4_Declare*);
12081205
void getBankAlignment(LiveRange* lr, BankAlign &align);
12091206
void printLiveIntervals();
12101207
void reportUndefinedUses(LivenessAnalysis& liveAnalysis, G4_BB* bb, G4_INST* inst, G4_Declare* referencedDcl, std::set<G4_Declare*>& defs, std::ofstream& optreport, Gen4_Operand_Number opndNum);

visa/LocalRA.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ void LocalRA::evenAlign()
130130
{
131131
if (kernel.getOptions()->getTarget() == VISA_3D && kernel.fg.size() > 2)
132132
{
133-
if (kernel.getSimdSize() >= NUM_DWORDS_PER_GRF)
133+
if (kernel.getSimdSize() >= 16)
134134
{
135135
// Set alignment of all GRF candidates
136136
// to 2GRF except for NoMask variables

0 commit comments

Comments
 (0)