Skip to content

Commit 8b0b9d1

Browse files
pratikasharZuul
authored andcommitted
Refactor alignment code. Even align variables with size between 1-2GRF. Variables > 2GRF use weak edges.
Change-Id: I301bb859afd07d67772f3213e9cc3f1dd9c11c85
1 parent b38cd84 commit 8b0b9d1

File tree

5 files changed

+181
-37
lines changed

5 files changed

+181
-37
lines changed

visa/FlowGraph.cpp

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4413,6 +4413,102 @@ void GlobalOpndHashTable::dump()
44134413
}
44144414
}
44154415

4416+
void G4_Kernel::computeChannelSlicing()
4417+
{
4418+
std::unordered_set<G4_Declare*> skipSendDcls;
4419+
unsigned int simdSize = getSimdSize();
4420+
channelSliced = true;
4421+
4422+
if (simdSize == 8 || simdSize == 16)
4423+
{
4424+
// SIMD8/16 kernels are not sliced
4425+
channelSliced = false;
4426+
return;
4427+
}
4428+
4429+
for (auto bb : fg)
4430+
{
4431+
for (auto inst : bb->getInstList())
4432+
{
4433+
if (inst->isPseudoKill() || inst->isWriteEnableInst())
4434+
continue;
4435+
4436+
if (inst->isSend())
4437+
{
4438+
auto dst = inst->getDst();
4439+
if (dst && dst->isDstRegRegion())
4440+
skipSendDcls.insert(dst->getTopDcl());
4441+
4442+
auto src = inst->getSrc(0);
4443+
if (src && src->isSrcRegRegion())
4444+
skipSendDcls.insert(src->getTopDcl());
4445+
4446+
src = inst->getSrc(1);
4447+
if (src && src->isSrcRegRegion())
4448+
skipSendDcls.insert(src->getTopDcl());
4449+
}
4450+
}
4451+
}
4452+
4453+
// .dcl V1 size = 128 bytes
4454+
// op (16|M0) V1(0,0) ..
4455+
// op (16|M16) V1(2,0) ..
4456+
// For above sequence, return 32. Instruction
4457+
// is broken in to 2 only due to hw restriction.
4458+
// Allocation of dcl is still as if it were a
4459+
// SIMD32 kernel.
4460+
4461+
// dcl -> lb, rb, emask offset
4462+
std::unordered_map<G4_Declare*, std::vector<std::tuple<unsigned int, unsigned int, unsigned int>>> defaultDefs;
4463+
for (auto bb : fg)
4464+
{
4465+
for (auto inst : bb->getInstList())
4466+
{
4467+
auto dst = inst->getDst();
4468+
if (!dst || !dst->isDstRegRegion() || !dst->getTopDcl() ||
4469+
skipSendDcls.find(dst->getTopDcl()) != skipSendDcls.end() ||
4470+
dst->asDstRegRegion()->getHorzStride() != 1)
4471+
continue;
4472+
4473+
auto regFileKind = dst->getTopDcl()->getRegFile();
4474+
if (regFileKind != G4_RegFileKind::G4_GRF && regFileKind != G4_RegFileKind::G4_INPUT)
4475+
continue;
4476+
4477+
auto dstElemSize = G4_Type_Table[dst->getType()].byteSize;
4478+
4479+
if (dst->getTopDcl()->getByteSize() <= dstElemSize * simdSize)
4480+
continue;
4481+
4482+
std::vector<std::tuple<unsigned int, unsigned int, unsigned int>> v =
4483+
{ std::make_tuple(dst->getLeftBound(), dst->getRightBound(), inst->getMaskOffset()) };
4484+
defaultDefs.insert(std::make_pair(dst->getTopDcl(), v));
4485+
}
4486+
}
4487+
4488+
for (auto dd : defaultDefs)
4489+
{
4490+
auto elemSize = dd.first->getElemSize();
4491+
for (auto defs : dd.second)
4492+
{
4493+
auto lb = std::get<0>(defs);
4494+
auto rb = std::get<1>(defs);
4495+
auto emaskOffset = std::get<2>(defs);
4496+
4497+
// Look for single instruction
4498+
if (emaskOffset == 0 && lb == 0 && rb == elemSize * 32)
4499+
channelSliced = false;
4500+
// Or broken instruction
4501+
if (emaskOffset == 16 && lb == elemSize * 16 && rb == elemSize * 32)
4502+
channelSliced = false;
4503+
}
4504+
4505+
if (!channelSliced)
4506+
break;
4507+
}
4508+
4509+
return;
4510+
}
4511+
44164512
void G4_Kernel::calculateSimdSize()
44174513
{
44184514
// Iterate over all instructions in kernel to check
@@ -4440,15 +4536,19 @@ void G4_Kernel::calculateSimdSize()
44404536
if (size > 16)
44414537
{
44424538
simdSize = 32;
4443-
return;
4539+
break;
44444540
}
44454541
else if (size > 8)
44464542
{
44474543
simdSize = 16;
44484544
}
44494545
}
44504546
}
4547+
if (simdSize == 32)
4548+
break;
44514549
}
4550+
4551+
computeChannelSlicing();
44524552
}
44534553

44544554
void G4_Kernel::dump() const

visa/FlowGraph.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,6 +1297,7 @@ class G4_Kernel
12971297
const char* name;
12981298
unsigned numRegTotal;
12991299
unsigned int simdSize;
1300+
bool channelSliced = true;
13001301
bool hasAddrTaken;
13011302
Options *m_options;
13021303

@@ -1440,8 +1441,11 @@ class G4_Kernel
14401441

14411442
Options *getOptions(){ return m_options; }
14421443
bool getOption(vISAOptions opt) const { return m_options->getOption(opt); }
1444+
void computeChannelSlicing();
14431445
void calculateSimdSize();
14441446
unsigned int getSimdSize() { return simdSize; }
1447+
bool getChannelSlicing() { return channelSliced; }
1448+
unsigned int getSimdSizeWithSlicing() { return channelSliced ? simdSize/2 : simdSize; }
14451449

14461450
void setHasAddrTaken(bool val) { hasAddrTaken = val; }
14471451
bool getHasAddrTaken() { return hasAddrTaken; }

visa/GraphColor.cpp

Lines changed: 72 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,34 +2228,48 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign)
22282228
}
22292229
}
22302230

2231+
bool GlobalRA::evenAlignNeeded(G4_Declare* dcl)
2232+
{
2233+
// Return true if even alignment is needed
2234+
// Even align needed if for given SIMD size and elem type,
2235+
// a complete def uses between 1-2 GRFs.
2236+
auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing();
2237+
G4_Declare* topdcl = dcl->getRootDeclare();
2238+
auto topdclAugMask = getAugmentationMask(topdcl);
2239+
2240+
if (!areAllDefsNoMask(topdcl) && !topdcl->getIsPartialDcl() &&
2241+
topdclAugMask != AugmentationMasks::NonDefault)
2242+
{
2243+
auto elemSizeToUse = topdcl->getElemSize();
2244+
if (elemSizeToUse < 4 && topdclAugMask == AugmentationMasks::Default32Bit)
2245+
// :uw with hstride 2 can also be Default32Bit and hence needs even alignment
2246+
elemSizeToUse = 4;
2247+
else if (elemSizeToUse < 8 && topdclAugMask == AugmentationMasks::Default64Bit)
2248+
elemSizeToUse = 8;
2249+
2250+
if (// Even align if size is between 1-2 GRFs, for >2GRF sizes use weak edges
2251+
(elemSizeToUse * kernelSimdSizeToUse) > (unsigned int)GENX_GRF_REG_SIZ &&
2252+
(elemSizeToUse * kernelSimdSizeToUse) <= (unsigned int)(2 * GENX_GRF_REG_SIZ) &&
2253+
!(kernel.fg.builder->getOption(vISA_enablePreemption) &&
2254+
dcl == kernel.fg.builder->getBuiltinR0()))
2255+
{
2256+
return true;
2257+
}
2258+
}
2259+
return false;
2260+
}
2261+
22312262
// This function can be invoked before local RA or after augmentation.
2232-
// When invoked before local RA, it sets all vars to be Even aligned,
2233-
// including NoMask ones. This is safe, but conservative. Post
2234-
// augmentation, dcl masks are available so only non-NoMask vars will
2235-
// be Even aligned. Others will be Either aligned. There is no need
2236-
// to store old value of align because HW has no restriction on
2237-
// even/odd alignment that HW conformity computes.
22382263
void GlobalRA::evenAlign()
22392264
{
22402265
// Update alignment of all GRF declares to align
22412266
for (auto dcl : kernel.Declares)
22422267
{
22432268
if (dcl->getRegFile() & G4_GRF)
22442269
{
2245-
G4_Declare* topdcl = dcl->getRootDeclare();
2246-
auto topdclAugMask = getAugmentationMask(topdcl);
2247-
2248-
if (!areAllDefsNoMask(topdcl) && !topdcl->getIsPartialDcl() &&
2249-
topdclAugMask != AugmentationMasks::NonDefault &&
2250-
topdclAugMask != AugmentationMasks::Default64Bit)
2270+
if (evenAlignNeeded(dcl))
22512271
{
2252-
if ((topdcl->getElemSize() >= 4 || topdclAugMask == AugmentationMasks::Default32Bit) &&
2253-
topdcl->getByteSize() >= GENX_GRF_REG_SIZ &&
2254-
!(kernel.fg.builder->getOption(vISA_enablePreemption) &&
2255-
dcl == kernel.fg.builder->getBuiltinR0()))
2256-
{
2257-
setEvenAligned(dcl, true);
2258-
}
2272+
setEvenAligned(dcl, true);
22592273
}
22602274
}
22612275
}
@@ -3113,9 +3127,7 @@ bool Augmentation::markNonDefaultMaskDef()
31133127
prevAugMask = gra.getAugmentationMask(dcl);
31143128
}
31153129

3116-
if (liveAnalysis.livenessClass(G4_GRF) &&
3117-
gra.getAugmentationMask(dcl) == AugmentationMasks::Default32Bit &&
3118-
kernel.getSimdSize() > NUM_DWORDS_PER_GRF)
3130+
if (gra.evenAlignNeeded(dcl))
31193131
{
31203132
auto dclLR = gra.getLocalLR(dcl);
31213133
if (dclLR)
@@ -3124,7 +3136,7 @@ bool Augmentation::markNonDefaultMaskDef()
31243136
auto phyReg = dclLR->getPhyReg(s);
31253137
if (phyReg && phyReg->asGreg()->getRegNum() % 2 != 0)
31263138
{
3127-
// If LRA assignment is not 2GRF aligned for SIMD16 then
3139+
// If LRA assignment is not 2GRF aligned for then
31283140
// mark it as non-default. GRA candidates cannot fully
31293141
// overlap with such ranges. Partial overlap is illegal.
31303142
gra.setAugmentationMask(dcl, AugmentationMasks::NonDefault);
@@ -4166,6 +4178,22 @@ bool Interference::isStrongEdgeBetween(G4_Declare* dcl1, G4_Declare* dcl2)
41664178
return false;
41674179
}
41684180

4181+
bool Augmentation::weakEdgeNeeded(AugmentationMasks m)
4182+
{
4183+
// Weak edge needed in case #GRF exceeds 2
4184+
4185+
if (m == AugmentationMasks::Default64Bit)
4186+
return (G4_Type_Table[Type_Q].byteSize*kernel.getSimdSizeWithSlicing()) > (unsigned int)(2 * GENX_GRF_REG_SIZ);
4187+
4188+
if (m == AugmentationMasks::Default32Bit)
4189+
{
4190+
// Even align up to 2 GRFs size variable, use weak edges beyond
4191+
return (G4_Type_Table[Type_D].byteSize*kernel.getSimdSizeWithSlicing()) > (unsigned int)(2 * GENX_GRF_REG_SIZ);
4192+
}
4193+
4194+
return false;
4195+
}
4196+
41694197
//
41704198
// Mark interference between newDcl and other incompatible dcls in current active lists.
41714199
//
@@ -4183,10 +4211,8 @@ void Augmentation::buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall)
41834211
{
41844212
if (liveAnalysis.livenessClass(G4_GRF) &&
41854213
// Populate compatible sparse intf data structure
4186-
// only for 64-bit bit types since others can be
4187-
// handled using Even align.
4188-
gra.getAugmentationMask(defaultDcl) == AugmentationMasks::Default64Bit &&
4189-
newDclAugMask == AugmentationMasks::Default64Bit)
4214+
// only for weak edges.
4215+
weakEdgeNeeded(newDclAugMask))
41904216
{
41914217
if (defaultDcl->getRegVar()->isPhyRegAssigned() &&
41924218
newDcl->getRegVar()->isPhyRegAssigned())
@@ -4402,7 +4428,7 @@ void Augmentation::augmentIntfGraph()
44024428

44034429
if (liveAnalysis.livenessClass(G4_GRF))
44044430
{
4405-
if (kernel.getSimdSize() > NUM_DWORDS_PER_GRF)
4431+
if (kernel.getSimdSize() >= NUM_DWORDS_PER_GRF)
44064432
{
44074433
// Set alignment of all GRF candidates
44084434
// to 2GRF except for NoMask variables
@@ -10544,9 +10570,9 @@ void VerifyAugmentation::verifyAlign(G4_Declare* dcl)
1054410570
if (it == masks.end())
1054510571
return;
1054610572

10547-
auto dclMask = std::get<1>((*it).second);
10548-
10549-
if (dclMask == AugmentationMasks::Default32Bit)
10573+
if (dcl->getByteSize() >= NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD].byteSize &&
10574+
dcl->getByteSize() <= 2 * NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD].byteSize &&
10575+
kernel->getSimdSize() > NUM_DWORDS_PER_GRF)
1055010576
{
1055110577
auto assignment = dcl->getRegVar()->getPhyReg();
1055210578
if (assignment && assignment->isGreg())
@@ -10642,6 +10668,14 @@ void VerifyAugmentation::labelBBs()
1064210668
#endif
1064310669
}
1064410670

10671+
unsigned int getGRFBaseOffset(G4_Declare* dcl)
10672+
{
10673+
unsigned int regNum = dcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
10674+
unsigned int regOff = dcl->getRegVar()->getPhyRegOff();
10675+
auto type = dcl->getElemType();
10676+
return (regNum * G4_GRF_REG_NBYTES) + (regOff * getTypeSize(type));
10677+
}
10678+
1064510679
bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
1064610680
{
1064710681
bool interferes = true;
@@ -10710,8 +10744,8 @@ bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
1071010744

1071110745
if (lr1->getAssigned() && lr2->getAssigned())
1071210746
{
10713-
auto preg1Start = dcl1->getGRFBaseOffset();
10714-
auto preg2Start = dcl2->getGRFBaseOffset();
10747+
auto preg1Start = getGRFBaseOffset(dcl1);
10748+
auto preg2Start = getGRFBaseOffset(dcl2);
1071510749
auto preg1End = preg1Start + dcl1->getByteSize();
1071610750
auto preg2End = preg2Start + dcl2->getByteSize();
1071710751

@@ -10756,8 +10790,8 @@ void VerifyAugmentation::verify()
1075610790
{
1075710791
if (dcl1->getRegFile() == G4_RegFileKind::G4_GRF && dcl2->getRegFile() == G4_RegFileKind::G4_GRF)
1075810792
{
10759-
auto preg1Start = dcl1->getGRFBaseOffset();
10760-
auto preg2Start = dcl2->getGRFBaseOffset();
10793+
auto preg1Start = getGRFBaseOffset(dcl1);
10794+
auto preg2Start = getGRFBaseOffset(dcl2);
1076110795
auto preg1End = preg1Start + dcl1->getByteSize();
1076210796
auto preg2End = preg2Start + dcl2->getByteSize();
1076310797

@@ -10823,6 +10857,9 @@ void VerifyAugmentation::verify()
1082310857
{
1082410858
bool interfere = interfereBetween(activeDcl, dcl);
1082510859

10860+
if (activeDcl->getIsPartialDcl() || dcl->getIsPartialDcl())
10861+
continue;
10862+
1082610863
if (!interfere)
1082710864
{
1082810865
std::cerr << dcl->getRegVar()->getName() << "(" << getStr(dclMask) << ") and " << activeDcl->getRegVar()->getName() << "(" <<

visa/GraphColor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ namespace vISA
310310
void buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall);
311311
void buildSIMDIntfAll(G4_Declare* newDcl);
312312
void handleSIMDIntf(G4_Declare* firstDcl, G4_Declare* secondDcl, bool isCall);
313+
bool weakEdgeNeeded(AugmentationMasks);
313314

314315
public:
315316
Augmentation(G4_Kernel& k, Interference& i, LivenessAnalysis& l, LiveRange* ranges[], GlobalRA& g);
@@ -1198,7 +1199,9 @@ namespace vISA
11981199
static uint32_t getRefCount(int loopNestLevel);
11991200
bool isReRAPass();
12001201
void updateSubRegAlignment(G4_SubReg_Align subAlign);
1202+
bool isChannelSliced();
12011203
void evenAlign();
1204+
bool evenAlignNeeded(G4_Declare*);
12021205
void getBankAlignment(LiveRange* lr, BankAlign &align);
12031206
void printLiveIntervals();
12041207
void reportUndefinedUses(LivenessAnalysis& liveAnalysis, G4_BB* bb, G4_INST* inst, G4_Declare* referencedDcl, std::set<G4_Declare*>& defs, std::ofstream& optreport, Gen4_Operand_Number opndNum);

visa/LocalRA.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ void LocalRA::evenAlign()
130130
{
131131
if (kernel.getOptions()->getTarget() == VISA_3D && kernel.fg.size() > 2)
132132
{
133-
if (kernel.getSimdSize() >= 16)
133+
if (kernel.getSimdSize() >= NUM_DWORDS_PER_GRF)
134134
{
135135
// Set alignment of all GRF candidates
136136
// to 2GRF except for NoMask variables

0 commit comments

Comments
 (0)