Skip to content

Commit cb113bb

Browse files
pratikasharigcbot
authored andcommitted
Changes in code.
1 parent 469ae7d commit cb113bb

13 files changed

+144
-269
lines changed

visa/G4_Opcode.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ enum class BankAlign {
6262
Odd = 3, // old align
6363
Even2GRF = 4, // 2-GRF even align 1100
6464
Odd2GRF = 5, // 2-GRF old align, 0011
65-
QuadGRF = 6, // 4-GRF align
66-
Align_NUM = 7 // Num of alignment
65+
Align_NUM = 6 // Num of alignment
6766
};
6867

6968
// An instruction's execution width

visa/GraphColor.cpp

Lines changed: 30 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,12 +2650,9 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign) {
26502650
}
26512651
}
26522652

2653-
int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
2653+
bool GlobalRA::evenAlignNeeded(G4_Declare *dcl) {
26542654
if (GlobalRA::useGenericAugAlign(builder.getPlatformGeneration())) {
2655-
// Return 0 if no special alignment is needed
2656-
// Return 2 if even alignment is needed
2657-
// Return 4 if quad alignment is needed
2658-
2655+
// Return true if even alignment is needed
26592656
// Even align needed if for given SIMD size and elem type,
26602657
// a complete def uses between 1-2 GRFs.
26612658
auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing();
@@ -2673,41 +2670,14 @@ int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
26732670
topdclAugMask == AugmentationMasks::Default64Bit)
26742671
elemSizeToUse = 8;
26752672

2676-
auto totalByteSize = elemSizeToUse * kernelSimdSizeToUse;
2677-
auto bucketSpans2GRFs = [&]() {
2678-
return totalByteSize > (unsigned)kernel.numEltPerGRF<Type_UB>() &&
2679-
totalByteSize <= (unsigned)(2 * kernel.numEltPerGRF<Type_UB>());
2680-
};
2681-
2682-
if (!(!builder.canReadR0() && dcl == kernel.fg.builder->getBuiltinR0())) {
2683-
if (use4GRFAlign) {
2684-
// The only time it's safe to do 2GRF align is when augmentation
2685-
// bucket is known to be Default32Bit, otherwise we need to align
2686-
// 4GRF. It isn't enough to simply check elemSize * GRF size to
2687-
// decide alignment.
2688-
if (topdclAugMask == AugmentationMasks::Default32Bit) {
2689-
if (bucketSpans2GRFs())
2690-
return 2;
2691-
} else if (topdclAugMask == AugmentationMasks::Default64Bit) {
2692-
if (bucketSpans2GRFs())
2693-
// :df SIMD16
2694-
return 2;
2695-
2696-
// :df SIMD32
2697-
return 4;
2698-
} else {
2699-
// Local RA will take this path as augmentation buckets are set
2700-
// to Undetermined. Although this is conservative, hybrid RA
2701-
// will run augmentation and compute buckets to fill in "holes".
2702-
// For eg, mov (32|M0) V10<2>:f should use 4GRF alignment as
2703-
// it's Default64Bit variable, although elem size is :f.
2704-
return 4;
2705-
}
2706-
} else {
2707-
// Even align if size is between 1-2 GRFs, for >2GRF sizes.
2708-
if (bucketSpans2GRFs())
2709-
return 2;
2710-
}
2673+
if ( // Even align if size is between 1-2 GRFs, for >2GRF sizes use weak
2674+
// edges
2675+
(elemSizeToUse * kernelSimdSizeToUse) >
2676+
(unsigned)kernel.numEltPerGRF<Type_UB>() &&
2677+
(elemSizeToUse * kernelSimdSizeToUse) <=
2678+
(unsigned)(2 * kernel.numEltPerGRF<Type_UB>()) &&
2679+
!(!builder.canReadR0() && dcl == kernel.fg.builder->getBuiltinR0())) {
2680+
return true;
27112681
}
27122682
}
27132683
} else {
@@ -2723,28 +2693,21 @@ int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
27232693
topdcl->getByteSize() >= kernel.numEltPerGRF<Type_UB>() &&
27242694
!(!builder.canReadR0() &&
27252695
dcl == kernel.fg.builder->getBuiltinR0())) {
2726-
return 2;
2696+
return true;
27272697
}
27282698
}
27292699
}
27302700
}
27312701

2732-
return 0;
2702+
return false;
27332703
}
27342704

2735-
void GlobalRA::augAlign() {
2736-
// Update alignment of all GRF declares based on
2737-
// augmentation bucket and platform.
2705+
// This function can be invoked before local RA or after augmentation.
2706+
void GlobalRA::evenAlign() {
2707+
// Update alignment of all GRF declares to align
27382708
for (auto dcl : kernel.Declares) {
27392709
if (dcl->getRegFile() & G4_GRF) {
2740-
unsigned int align = getAlignFromAugBucket(dcl);
2741-
if (align == 4) {
2742-
if (!isQuadAligned(dcl)) {
2743-
incRA.evenAlignUpdate(dcl);
2744-
}
2745-
forceQuadAlign(dcl);
2746-
}
2747-
else if (align == 2) {
2710+
if (evenAlignNeeded(dcl)) {
27482711
if (!isEvenAligned(dcl)) {
27492712
incRA.evenAlignUpdate(dcl);
27502713
}
@@ -3508,8 +3471,8 @@ bool Augmentation::markNonDefaultMaskDef() {
35083471

35093472
bool checkLRAAlign = false;
35103473
if (liveAnalysis.livenessClass(G4_GRF)) {
3511-
if (GlobalRA::useGenericAugAlign(kernel.getPlatformGeneration()) &&
3512-
gra.getAlignFromAugBucket(dcl) > 0)
3474+
if ((GlobalRA::useGenericAugAlign(kernel.getPlatformGeneration()) &&
3475+
gra.evenAlignNeeded(dcl)))
35133476
checkLRAAlign = true;
35143477
else if (gra.getAugmentationMask(dcl) ==
35153478
AugmentationMasks::Default32Bit &&
@@ -3522,16 +3485,10 @@ bool Augmentation::markNonDefaultMaskDef() {
35223485
if (dclLR) {
35233486
int s;
35243487
auto phyReg = dclLR->getPhyReg(s);
3525-
unsigned int maxAlign = 2;
3526-
if (gra.use4GRFAlign && gra.getAugmentationMask(dcl) == AugmentationMasks::Default64Bit) {
3527-
maxAlign = 4;
3528-
}
3529-
if (phyReg && phyReg->asGreg()->getRegNum() % maxAlign != 0) {
3530-
// If LRA assignment is not aligned as expected then
3488+
if (phyReg && phyReg->asGreg()->getRegNum() % 2 != 0) {
3489+
// If LRA assignment is not 2GRF aligned for then
35313490
// mark it as non-default. GRA candidates cannot fully
35323491
// overlap with such ranges. Partial overlap is illegal.
3533-
vISA_ASSERT(!gra.use4GRFAlign,
3534-
"expecting LRA allocation to be 4GRF aligned");
35353492
gra.setAugmentationMask(dcl, AugmentationMasks::NonDefault);
35363493
nonDefaultMaskDefFound = true;
35373494
}
@@ -4238,8 +4195,6 @@ bool Interference::isStrongEdgeBetween(const G4_Declare *dcl1,
42384195

42394196
bool Augmentation::weakEdgeNeeded(AugmentationMasks defaultDclMask,
42404197
AugmentationMasks newDclMask) {
4241-
if (gra.use4GRFAlign)
4242-
return false;
42434198
if (useGenericAugAlign) {
42444199
// Weak edge needed in case #GRF exceeds 2
42454200
if (newDclMask == AugmentationMasks::Default64Bit)
@@ -4791,9 +4746,9 @@ void Augmentation::augmentIntfGraph() {
47914746
// to 2GRF except for NoMask variables
47924747
VISA_DEBUG_VERBOSE(std::cout
47934748
<< "Kernel size is SIMD" << kernel.getSimdSize()
4794-
<< " so updating all GRFs to aug align"
4749+
<< " so updating all GRFs to be 2GRF aligned"
47954750
<< "\n");
4796-
gra.augAlign();
4751+
gra.evenAlign();
47974752
}
47984753
gra.updateSubRegAlignment(kernel.getGRFAlign());
47994754
}
@@ -5099,7 +5054,6 @@ void GraphColor::computeDegreeForGRF() {
50995054
// consider weak edges in degree computation
51005055
auto *weakEdges = intf.getCompatibleSparseIntf(lrs[i]->getDcl());
51015056
if (weakEdges) {
5102-
vISA_ASSERT(!gra.use4GRFAlign, "not expecting weak edges");
51035057
for (auto weakNeighbor : *weakEdges) {
51045058
if (!weakNeighbor->getRegVar()->isRegAllocPartaker())
51055059
continue;
@@ -5413,22 +5367,16 @@ void GraphColor::relaxNeighborDegreeGRF(LiveRange *lr) {
54135367
if (!(lr->getIsPseudoNode()) && !(lr->getIsPartialDcl())) {
54145368
unsigned lr_id = lr->getVar()->getId();
54155369
bool lr2EvenAlign = gra.isEvenAligned(lr->getDcl());
5416-
unsigned int lr2AugAlign = gra.getAugAlign(lr->getDcl());
54175370
unsigned lr2_nreg = lr->getNumRegNeeded();
54185371

54195372
// relax degree between 2 nodes
54205373
auto relaxDegree = [&](LiveRange *lr1) {
54215374
if (lr1->getActive() && !lr1->getIsPseudoNode() &&
54225375
!(lr1->getIsPartialDcl())) {
5376+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
54235377
unsigned lr1_nreg = lr1->getNumRegNeeded();
5424-
unsigned w = 0;
5425-
if (gra.use4GRFAlign) {
5426-
unsigned int lr1AugAlign = gra.getAugAlign(lr1->getDcl());
5427-
w = edgeWeightWith4GRF(lr1AugAlign, lr2AugAlign, lr1_nreg, lr2_nreg);
5428-
} else {
5429-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
5430-
w = edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
5431-
}
5378+
unsigned w =
5379+
edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
54325380
VISA_DEBUG_VERBOSE({
54335381
std::cout << "\t relax ";
54345382
lr1->dump();
@@ -5834,15 +5782,9 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF,
58345782
if (!failed_alloc) {
58355783
// When evenAlignNeeded is true, it is binding for correctness
58365784
bool evenAlignNeeded = gra.isEvenAligned(lrVar->getDeclare());
5837-
bool quadAlignNeeded = gra.isQuadAligned(lrVar->getDeclare());
5838-
BankAlign align = BankAlign::Either;
5839-
if (quadAlignNeeded)
5840-
align = BankAlign::QuadGRF;
5841-
else if (evenAlignNeeded)
5842-
align = BankAlign::Even;
5843-
5785+
BankAlign align = evenAlignNeeded ? BankAlign::Even : BankAlign::Either;
58445786
if (allocFromBanks) {
5845-
vISA_ASSERT(align != BankAlign::QuadGRF, "unexpected value");
5787+
58465788
if (!isHybrid && oneGRFBankDivision &&
58475789
(!evenAlignNeeded ||
58485790
builder.getPlatformGeneration() == PlatformGen::GEN9)) {
@@ -10934,20 +10876,12 @@ void GlobalRA::insertRestoreAddr(G4_BB *bb) {
1093410876
// correctness.
1093510877
//
1093610878
unsigned GraphColor::edgeWeightGRF(const LiveRange *lr1, const LiveRange *lr2) {
10879+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
10880+
bool lr2EvenAlign = gra.isEvenAligned(lr2->getDcl());
1093710881
unsigned lr1_nreg = lr1->getNumRegNeeded();
1093810882
unsigned lr2_nreg = lr2->getNumRegNeeded();
1093910883

10940-
if (gra.use4GRFAlign) {
10941-
auto lr1Align = gra.getAugAlign(lr1->getDcl());
10942-
auto lr2Align = gra.getAugAlign(lr2->getDcl());
10943-
10944-
return edgeWeightWith4GRF(lr1Align, lr2Align, lr1_nreg, lr2_nreg);
10945-
} else {
10946-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
10947-
bool lr2EvenAlign = gra.isEvenAligned(lr2->getDcl());
10948-
10949-
return edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
10950-
}
10884+
return edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
1095110885
}
1095210886

1095310887
unsigned GraphColor::edgeWeightARF(const LiveRange *lr1, const LiveRange *lr2) {

visa/GraphColor.h

Lines changed: 16 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -778,8 +778,6 @@ class Interference {
778778
return nullptr;
779779
}
780780

781-
size_t numVarsWithWeakEdges() const { return compatibleSparseIntf.size(); }
782-
783781
void init() {
784782
if (useDenseMatrix()) {
785783
auto N = (size_t)rowSize * (size_t)maxId;
@@ -897,46 +895,15 @@ class GraphColor {
897895
return lr1_nreg + lr2_nreg - 1;
898896
}
899897

900-
unsigned sum = lr1_nreg + lr2_nreg;
901-
if (!lr2EvenAlign)
898+
if (!lr2EvenAlign) {
899+
unsigned sum = lr1_nreg + lr2_nreg;
902900
return sum + 1 - ((sum) % 2);
903-
904-
return sum - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
905-
}
906-
907-
static unsigned edgeWeightWith4GRF(int lr1Align, int lr2Align,
908-
unsigned lr1_nreg, unsigned lr2_nreg) {
909-
if (lr1Align < 4 && lr2Align < 4)
910-
return edgeWeightGRF(lr1Align % 2, lr2Align % 2, lr1_nreg, lr2_nreg);
911-
912-
if (lr2Align == 4) {
913-
if (lr1Align < 2)
914-
return lr1_nreg + lr2_nreg - 1;
915-
if (lr1Align == 2) {
916-
// if (lr2_nreg % 2 == 0) -- lr2 size is even
917-
// return lr2_nreg + lr1_nreg;
918-
// if (lr2_nreg % 2 == 1) -- lr2 size is odd
919-
// return lr2_nreg + lr1_nreg + 1;
920-
921-
return lr1_nreg + lr2_nreg + (lr2_nreg % 2);
922-
} else if (lr1Align == 4) {
923-
if (lr2_nreg % 4 == 0)
924-
// lr2 size is multiple of 4
925-
return lr1_nreg + lr2_nreg;
926-
927-
// if lr2_nreg % 4 == 1 -- lr2 size is 1 + (4*n)
928-
// return lr1_nreg + lr2_nreg + 3;
929-
// if lr2_nreg % 2 == 0 -- lr2 size is 2 + (4*n)
930-
// return lr2_nreg + lr1_nreg + 2;
931-
// if lr2_nreg % 4 == 3 -- lr2 size is 3 + (4*n)
932-
// return lr2_nreg + lr1_nreg + 1;
933-
934-
return lr1_nreg + lr2_nreg + 4 - (lr2_nreg % 4);
935-
}
901+
} else if (lr2EvenAlign) {
902+
return lr1_nreg + lr2_nreg - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
903+
} else {
904+
vISA_ASSERT_UNREACHABLE("should be unreachable");
905+
return 0;
936906
}
937-
938-
vISA_ASSERT(lr1Align == 4, "unexpected condition");
939-
return edgeWeightWith4GRF(lr2Align, lr1Align, lr2_nreg, lr1_nreg);
940907
}
941908

942909
void computeDegreeForGRF();
@@ -1018,7 +985,7 @@ struct RAVarInfo {
1018985
unsigned subOff = 0;
1019986
std::vector<BundleConflict> bundleConflicts;
1020987
G4_SubReg_Align subAlign = G4_SubReg_Align::Any;
1021-
int augAlignInGRF = 0;
988+
bool isEvenAlign = false;
1022989
AugmentationMasks augMask = AugmentationMasks::Undetermined;
1023990
};
1024991

@@ -1143,8 +1110,6 @@ class GlobalRA {
11431110
// The pre assigned forbidden register bits for different kinds
11441111
ForbiddenRegs fbdRegs;
11451112

1146-
const bool use4GRFAlign = false;
1147-
11481113
private:
11491114
template <class REGION_TYPE>
11501115
static unsigned getRegionDisp(REGION_TYPE *region, const IR_Builder &irb);
@@ -1607,35 +1572,12 @@ class GlobalRA {
16071572
return true;
16081573
}
16091574

1610-
bool isQuadAligned(const G4_Declare *dcl) const {
1611-
auto augAlign = getAugAlign(dcl);
1612-
return augAlign == 4;
1613-
}
1614-
1615-
bool isEvenAligned(const G4_Declare* dcl) const {
1616-
auto augAlign = getAugAlign(dcl);
1617-
return augAlign > 0 && augAlign % 2 == 0;
1618-
}
1619-
1620-
int getAugAlign(const G4_Declare *dcl) const {
1621-
return getVar(dcl).augAlignInGRF;
1622-
}
1623-
1624-
void forceQuadAlign(const G4_Declare *dcl) { setAugAlign(dcl, 4); }
1625-
1626-
void resetAlign(const G4_Declare *dcl) { setAugAlign(dcl, 0); }
1627-
1628-
// Due to legacy usage, this method takes a boolean that, when set,
1629-
// causes alignment to be set to Even (2). When boolean flag is
1630-
// reset, it also resets alignment to Either (0).
1631-
void setEvenAligned(const G4_Declare *dcl, bool align) {
1632-
setAugAlign(dcl, align ? 2 : 0);
1575+
bool isEvenAligned(const G4_Declare *dcl) const {
1576+
return getVar(dcl).isEvenAlign;
16331577
}
16341578

1635-
void setAugAlign(const G4_Declare *dcl, int align) {
1636-
vISA_ASSERT(align <= 2 || use4GRFAlign, "unexpected alignment");
1637-
vISA_ASSERT(align <= 4, "unsupported alignment");
1638-
allocVar(dcl).augAlignInGRF = align;
1579+
void setEvenAligned(const G4_Declare *dcl, bool e) {
1580+
allocVar(dcl).isEvenAlign = e;
16391581
}
16401582

16411583
BankAlign getBankAlign(const G4_Declare *) const;
@@ -1650,8 +1592,7 @@ class GlobalRA {
16501592
useLscForNonStackCallSpillFill(
16511593
k.fg.builder->useLscForNonStackSpillFill()),
16521594
useLscForScatterSpill(k.fg.builder->supportsLSC() &&
1653-
k.fg.builder->getOption(vISA_scatterSpill)),
1654-
use4GRFAlign(k.fg.builder->supports4GRFAlign()) {
1595+
k.fg.builder->getOption(vISA_scatterSpill)) {
16551596
vars.resize(k.Declares.size());
16561597

16571598
if (kernel.getOptions()->getOption(vISA_VerifyAugmentation)) {
@@ -1675,9 +1616,8 @@ class GlobalRA {
16751616
static uint32_t getRefCount(int loopNestLevel);
16761617
void updateSubRegAlignment(G4_SubReg_Align subAlign);
16771618
bool isChannelSliced();
1678-
// Used by LRA/GRA/hybrid RA
1679-
void augAlign();
1680-
int getAlignFromAugBucket(G4_Declare *);
1619+
void evenAlign();
1620+
bool evenAlignNeeded(G4_Declare *);
16811621
void getBankAlignment(LiveRange *lr, BankAlign &align);
16821622
void printLiveIntervals();
16831623
void reportUndefinedUses(LivenessAnalysis &liveAnalysis, G4_BB *bb,
@@ -1762,7 +1702,7 @@ class GlobalRA {
17621702
}
17631703

17641704
void copyAlignment(G4_Declare *dst, G4_Declare *src) {
1765-
setAugAlign(dst, getAugAlign(src));
1705+
setEvenAligned(dst, isEvenAligned(src));
17661706
setSubRegAlign(dst, getSubRegAlign(src));
17671707
}
17681708

visa/HWCaps.inc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -846,9 +846,4 @@ bool hasReadSuppressionOrSharedLocalMemoryWAs() const {
846846
bool supportNativeSIMD32() const {
847847
return false;
848848
}
849-
850-
bool supports4GRFAlign() const {
851-
return false;
852-
}
853-
854849
// end HW capabilities

0 commit comments

Comments
 (0)