Skip to content

Commit 92c136e

Browse files
DianaChenigcbot
authored andcommitted
IGA: updated dpas macro rules
Updated 2 rules in DpasMacroBuilder: - src2 read suppression can allow dp dpas as long as the rep-count is 4 - WAW and WAR dependency are allowed between dpas within the same macro
1 parent f5198f4 commit 92c136e

File tree

2 files changed

+37
-25
lines changed

2 files changed

+37
-25
lines changed

visa/iga/IGALibrary/IR/RegDeps.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ size_t DepSetBuilder::DpasMacroBuilder::getNumberOfSuppresionGroups(uint32_t src
469469
return 0;
470470
}
471471

472+
472473
size_t DepSetBuilder::DpasMacroBuilder::formSrcSuppressionBlock(
473474
InstListIterator startIt, uint32_t srcIdx) {
474475
// get the candidate block
@@ -553,9 +554,10 @@ DepSetBuilder::DpasMacroBuilder::SuppressBlockPtrTy
553554
DepSetBuilder::DpasMacroBuilder::getSuppressionBlockCandidate(
554555
InstListIterator startIt, uint32_t srcIdx,
555556
BitSet<>& allDstBits, BitSet<>& allSrcBits,
556-
BitSet<>& allDstNoLastBits, BitSet<>& allSrcNoLastBits) const {
557+
BitSet<>& allDstNoLastBits, BitSet<>& allSrcNoLastBits,
558+
int forceGroupNum) const {
557559
assert(srcIdx == 1 || srcIdx == 2);
558-
size_t maxGroupNum = getNumberOfSuppresionGroups(srcIdx);
560+
size_t maxGroupNum = forceGroupNum < 0 ? getNumberOfSuppresionGroups(srcIdx) : forceGroupNum;
559561
// return null if the given src can't be suppressed
560562
if (!maxGroupNum)
561563
return nullptr;
@@ -612,13 +614,12 @@ bool DepSetBuilder::DpasMacroBuilder::srcIsSuppressCandidate(const Instruction&
612614
if (srcIdx == 1)
613615
return true;
614616
if (srcIdx == 2) {
615-
// can't be DP dpas
617+
// DP dpas must have rep count 4
616618
if (inst.isDF())
617-
return false;
619+
return GetDpasRepeatCount(inst.getDpasFc()) == 4;
618620

619-
if (GetDpasRepeatCount(inst.getDpasFc()) != 8)
620-
return false;
621-
return true;
621+
// allow only rep count 8 for non-DP dpase
622+
return GetDpasRepeatCount(inst.getDpasFc()) == 8;
622623
}
623624
return false;
624625
}
@@ -631,10 +632,8 @@ bool DepSetBuilder::DpasMacroBuilder::hasProducerConsumerDep(
631632
BitSet<> new_dstbits(m_dsBuilder.getGRF_LEN());
632633
setDstSrcBits(src_range, dst_range, new_srcbits, new_dstbits);
633634

634-
// check if there is WAR/RAW/WAW dependency
635-
if (target_src_bits.intersects(new_dstbits) ||
636-
target_dst_bits.intersects(new_srcbits) ||
637-
target_dst_bits.intersects(new_dstbits))
635+
// check if there is RAW dependency
636+
if (target_dst_bits.intersects(new_srcbits))
638637
return true;
639638
return false;
640639
}
@@ -663,13 +662,16 @@ const Instruction& DepSetBuilder::DpasMacroBuilder::formMacro(size_t& dpasCnt) {
663662
m_inps.getDpasDstDependency(**cur, dst_range);
664663
InstListIterator next = cur;
665664
next++;
666-
// early exit if there is no following instructions
667-
if (next == m_instList.end()) {
665+
// early exit if there is no following instructions or dpas depth is not 8
666+
if (next == m_instList.end() || GetDpasSystolicDepth((*cur)->getDpasFc()) != 8) {
668667
updateRegFootprintsToDepSets(src_range, src_extra_range, dst_range);
669668
return **cur;
670669
}
671670

672-
dpasCnt = std::max(dpasCnt, formSrcSuppressionBlock(m_firstDpasIt, 1));
671+
bool formMacroForSrc1 = false;
672+
673+
if (!formMacroForSrc1)
674+
dpasCnt = std::max(dpasCnt, formSrcSuppressionBlock(m_firstDpasIt, 1));
673675
dpasCnt = std::max(dpasCnt, formSrcSuppressionBlock(m_firstDpasIt, 2));
674676

675677
if (dpasCnt == 1) {

visa/iga/IGALibrary/IR/RegDeps.hpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -414,12 +414,18 @@ class DepSetBuilder {
414414
// 1. consecutive DPAS instructions of the same opcode
415415
// 2. same datatype of the same operand across all instructions
416416
// 3. same execution mask across all instructions
417-
// 4. same depth
418-
// 5. has no internal dependency within the instruction
419-
// with an exception that for depth 8 dpas, src0and dst dependency is allowed if they are completely the same
420-
// 6. One of below conditions is met:
421-
// a) src1 read suppression is fulfilled
422-
// b) src2 read suppression is fulfilled
417+
// 4. depth is 8
418+
// 5. has no internal dependency within each instruction
419+
// - with an exception that for depth 8 dpas, src0 and dst dependency is allowed if they are completely the same
420+
// 6. no producer-consumer relationships (RAW) within the macro
421+
// - WAW and WAR are allowed since dpas are in ordered within the dpas pipe
422+
// 7. For a DPAS 8xN sequence, where N !=8, a macro has to have at least two dpas b2b instructions that share the same
423+
// src1 and that the sum of the number of either Src0 or Src2 registers they read is 8 or more after the same macro
424+
// can change to another sequence of dpas instructions with different Src1
425+
// (** Note that violating this rule causes functional issue)
426+
// 8. One of below conditions is met:
427+
// a) for src1 read suppression
428+
// b) for src2 read suppression
423429
class DpasMacroBuilder {
424430
public:
425431
DpasMacroBuilder(
@@ -569,18 +575,22 @@ class DepSetBuilder {
569575
// to srcIdx suppression. Return number of instructions found
570576
size_t formSrcSuppressionBlock(InstListIterator startIt, uint32_t srcIdx);
571577

578+
572579
// return the candidate SuppressBlock that is found fulfilling read suppression requirement
573580
// of given src index, start from the give instruction. This block is the first candidate block
574581
// of instructions register those can be suppressed. Will need to check if the following instructions
575582
// having the same registers so that they can actually being suppressed.
576-
// return nullptr if there is no chance to suppress the given src
577-
// allDstBits, allSrcBits - all used grf bits in the return suppressBlock
578-
// allDstNoLastBits, allSrcNoLastBits - all used grf in the return suppressBlock except the
579-
// last instruction's
583+
// * return nullptr if there is no chance to suppress the given src
584+
// * allDstBits, allSrcBits - all used grf bits in the return suppressBlock
585+
// * allDstNoLastBits, allSrcNoLastBits - all used grf in the return suppressBlock except the
586+
// last instruction's
587+
// * forceGroupNum - force to use the given value as maximum number of suppression groups instead of
588+
// getting it from getNumberOfSuppresionGroups
580589
SuppressBlockPtrTy getSuppressionBlockCandidate(
581590
InstListIterator startIt, uint32_t srcIdx,
582591
BitSet<>& allDstBits, BitSet<>& allSrcBits,
583-
BitSet<>& allDstNoLastBits, BitSet<>& allSrcNoLastBits) const;
592+
BitSet<>& allDstNoLastBits, BitSet<>& allSrcNoLastBits,
593+
int forceGroupNum = -1) const;
584594

585595
bool srcIsSuppressCandidate(const Instruction& inst, uint32_t srcIdx) const;
586596

0 commit comments

Comments
 (0)