Skip to content

Commit ca90db0

Browse files
weiyu-chenpaigeale
authored andcommitted
Refactor operand bit-vector computation, second try
Change-Id: I1c637d84ea413a99649940966777adc5b99f2c2d
1 parent 7697941 commit ca90db0

File tree

6 files changed

+134
-454
lines changed

6 files changed

+134
-454
lines changed

visa/Gen4_IR.cpp

Lines changed: 110 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -4723,22 +4723,18 @@ void G4_DstRegRegion::setDstBitVec( uint8_t exec_size )
47234723
unsigned short s_size = horzStride * type_size;
47244724

47254725
// General cases.
4726-
unsigned short bit_seq = G4_Type_Table[type].footprint;
4726+
uint64_t bit_seq = G4_Type_Table[type].footprint;
47274727
for (uint8_t i = 0; i < exec_size; ++i)
47284728
{
47294729
int eltOffset = i * s_size;
4730-
if (eltOffset >= getGRFSize())
4730+
// no element can cross 64-byte boundary
4731+
if (eltOffset >= 64)
47314732
{
4732-
footprint1 |= ((uint64_t)bit_seq) << (eltOffset - getGRFSize());
4733+
footprint1 |= bit_seq << (eltOffset - 64);
47334734
}
4734-
else if (eltOffset + G4_Type_Table[type].byteSize < getGRFSize())
4735+
else
47354736
{
4736-
footprint0 |= ((uint64_t)bit_seq) << eltOffset;
4737-
}
4738-
else
4739-
{
4740-
footprint0 |= ((uint64_t)bit_seq) << eltOffset; // 4 + 31 --> 1 BIT MASKED
4741-
footprint1 |= ((uint64_t)bit_seq) >> (getGRFSize() - eltOffset); // 32 - 31 = 1, keep 3
4737+
footprint0 |= bit_seq << eltOffset;
47424738
}
47434739
}
47444740

@@ -4945,67 +4941,33 @@ static G4_CmpRelation compareRegRegionToOperand(G4_Operand* regRegion, G4_Operan
49454941
return Rel_interfere;
49464942
}
49474943

4948-
// Now both operands are within two GRFs, comparing their L/H vectors
4949-
// to get more precise relations
4950-
int dist = left_bound2 - myLeftBound;
4951-
uint64_t new_bitVecL = myBitVecL, new_bitVecH = myBitVecH;
4952-
if (dist > 0 && dist < (2 * GENX_GRF_REG_SIZ))
4953-
{
4954-
if (dist >= GENX_GRF_REG_SIZ)
4955-
{
4956-
uint64_t lbit = new_bitVecH >> (dist - GENX_GRF_REG_SIZ);
4957-
new_bitVecL = lbit;
4958-
new_bitVecH = 0;
4959-
}
4960-
else
4961-
{
4962-
new_bitVecL >>= dist;
4963-
uint64_t lbit = new_bitVecH << (GENX_GRF_REG_SIZ - dist);
4964-
new_bitVecL |= lbit;
4965-
new_bitVecH >>= dist;
4966-
}
4967-
}
4968-
else if (dist < 0 && dist > (-2 * GENX_GRF_REG_SIZ))
4944+
// Now both operands are within two GRFs, compare their footprint to get precise relations
4945+
int maskSize = 2 * getGRFSize();
4946+
if (myDcl)
49694947
{
4970-
dist = abs(dist);
4971-
if (dist >= GENX_GRF_REG_SIZ)
4972-
{
4973-
uint64_t lbit = opndBitVecH >> (dist - GENX_GRF_REG_SIZ);
4974-
opndBitVecL = lbit;
4975-
opndBitVecH = 0;
4976-
}
4977-
else
4978-
{
4979-
opndBitVecL >>= dist;
4980-
uint64_t lbit = opndBitVecH << (GENX_GRF_REG_SIZ - dist);
4981-
opndBitVecL |= lbit;
4982-
opndBitVecH >>= dist;
4983-
}
4948+
maskSize = myDcl->getRegVar()->isFlag() ? myDcl->getNumberFlagElements()
4949+
: myDcl->getByteSize();
49844950
}
4985-
uint64_t commonL = new_bitVecL & opndBitVecL, commonH = new_bitVecH & opndBitVecH;
4951+
BitSet myBitSet(maskSize, false);
4952+
BitSet otherBitSet(maskSize, false);
4953+
regRegion->updateFootPrint(myBitSet, true);
4954+
opnd->updateFootPrint(otherBitSet, true);
49864955

4987-
if (myLeftBound <= left_bound2 &&
4988-
myRightBound >= right_bound2 &&
4989-
commonL == opndBitVecL &&
4990-
commonH == opndBitVecH)
4991-
{
4992-
return Rel_gt;
4993-
}
4994-
else if (myLeftBound >= left_bound2 &&
4995-
myRightBound <= right_bound2 &&
4996-
commonL == new_bitVecL &&
4997-
commonH == new_bitVecH)
4998-
{
4999-
return Rel_lt;
5000-
}
5001-
else if (dist < (2*GENX_GRF_REG_SIZ) && dist > (-2 * GENX_GRF_REG_SIZ) && commonL == 0 && commonH == 0)
4956+
BitSet tmp = myBitSet;
4957+
myBitSet &= otherBitSet;
4958+
if (myBitSet.isEmpty())
50024959
{
50034960
return Rel_disjoint;
50044961
}
5005-
else
4962+
4963+
myBitSet = tmp;
4964+
myBitSet -= otherBitSet;
4965+
if (myBitSet.isEmpty())
50064966
{
5007-
return Rel_interfere;
4967+
return Rel_lt;
50084968
}
4969+
otherBitSet -= tmp;
4970+
return otherBitSet.isEmpty() ? Rel_gt : Rel_interfere;
50094971
}
50104972
}
50114973
}
@@ -5974,8 +5936,8 @@ void G4_Predicate::splitPred( )
59745936

59755937
bitVec[0] = ((uint32_t)getBitVecL()) >> shiftLen;
59765938
}
5977-
void
5978-
G4_CondMod::emit(std::ostream& output, bool symbolreg)
5939+
5940+
void G4_CondMod::emit(std::ostream& output, bool symbolreg)
59795941
{
59805942
output << '.' << CondModStr[mod];
59815943
output << '.';
@@ -6191,53 +6153,6 @@ G4_Imm::emitAutoFmt(std::ostream& output)
61916153
}
61926154
}
61936155

6194-
int64_t G4_Imm::typecastVals(int64_t value, G4_Type type)
6195-
{
6196-
int64_t retVal = 0;
6197-
switch (type)
6198-
{
6199-
case Type_UD:
6200-
case Type_UV:
6201-
case Type_VF:
6202-
{
6203-
retVal = (int64_t)((unsigned int)value);
6204-
break;
6205-
}
6206-
case Type_D:
6207-
case Type_V:
6208-
{
6209-
retVal = (int64_t)((int)value);
6210-
break;
6211-
}
6212-
case Type_UW:
6213-
{
6214-
retVal = (int64_t)((uint16_t)value);
6215-
break;
6216-
}
6217-
case Type_W:
6218-
{
6219-
retVal = (int64_t)((int16_t)value);
6220-
break;
6221-
}
6222-
case Type_UB:
6223-
{
6224-
retVal = (int64_t)((uint8_t)value);
6225-
break;
6226-
}
6227-
case Type_B:
6228-
{
6229-
retVal = (int64_t)((int8_t)value);
6230-
break;
6231-
}
6232-
default:
6233-
{
6234-
// Dont do float conversions
6235-
retVal = value;
6236-
}
6237-
}
6238-
return retVal;
6239-
}
6240-
62416156
G4_RegVar *
62426157
G4_RegVarTransient::getNonTransientBaseRegVar ()
62436158
{
@@ -6411,7 +6326,7 @@ void G4_SrcRegRegion::computeLeftBound()
64116326

64126327
void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
64136328
{
6414-
unsigned short bit_seq = G4_Type_Table[type].footprint;
6329+
uint64_t bit_seq = G4_Type_Table[type].footprint;
64156330
unsigned short typeSize = (unsigned short)G4_Type_Table[type].byteSize;
64166331

64176332
uint64_t footPrint0 = 0;
@@ -6424,24 +6339,14 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
64246339
}
64256340
else if (desc->isContiguous(exec_size))
64266341
{
6342+
// fast path
64276343
int totalBytes = exec_size * typeSize;
6428-
MUST_BE_TRUE(totalBytes <= 2 * getGRFSize(), "total bits exceeds 2 GRFs");
6429-
if (totalBytes == getGRFSize() * 2)
6430-
{
6431-
footPrint0 = ULLONG_MAX;
6432-
footPrint1 = ULLONG_MAX;
6433-
}
6434-
else
6435-
{
6436-
if (totalBytes <= getGRFSize())
6437-
{
6438-
footPrint0 = (1ULL << totalBytes) - 1;
6439-
}
6440-
else
6441-
{
6442-
footPrint0 = ULLONG_MAX;
6443-
footPrint1 = (1ULL << (totalBytes - getGRFSize())) - 1;
6444-
}
6344+
MUST_BE_TRUE(totalBytes <= 2 * getGRFSize(), "total bytes exceed 2 GRFs");
6345+
6346+
footPrint0 = totalBytes < 64 ? (1ULL << totalBytes) - 1 : ULLONG_MAX;
6347+
if (totalBytes > 64)
6348+
{
6349+
footPrint1 = totalBytes == 128 ? ULLONG_MAX : (1ULL << (totalBytes - 64)) - 1;
64456350
}
64466351
}
64476352
else
@@ -6451,28 +6356,21 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
64516356
for (int j = 0; j < desc->width; ++j)
64526357
{
64536358
int eltOffset = i * desc->vertStride * typeSize + j * desc->horzStride * typeSize;
6454-
6455-
if (eltOffset >= getGRFSize())
6456-
{
6457-
footPrint1 |= ((uint64_t)bit_seq) << (eltOffset - getGRFSize());
6458-
}
6459-
else if (eltOffset + G4_Type_Table[type].byteSize < getGRFSize())
6359+
// no element can cross 64-byte boundary
6360+
if (eltOffset >= 64)
64606361
{
6461-
footPrint0 |= ((uint64_t)bit_seq) << eltOffset;
6362+
footPrint1 |= bit_seq << (eltOffset - 64);
64626363
}
64636364
else
64646365
{
6465-
footPrint0 |= ((uint64_t)bit_seq) << eltOffset;
6466-
footPrint1 |= ((uint64_t)bit_seq) >> (getGRFSize() - eltOffset);
6366+
footPrint0 |= bit_seq << eltOffset;
64676367
}
64686368
}
64696369
}
64706370
}
64716371

64726372
bitVec[0] = footPrint0;
64736373
bitVec[1] = footPrint1;
6474-
6475-
return;
64766374
}
64776375

64786376
unsigned G4_SrcRegRegion::computeRightBound( uint8_t exec_size )
@@ -6905,6 +6803,76 @@ void G4_INST::computeLeftBoundForImplAcc(G4_Operand* opnd)
69056803
}
69066804
}
69076805

6806+
//
6807+
// Normalize an operand's bitvec footprint based on its left bound
6808+
// and update the given bitset.
6809+
// If isSet is true, we set all bits that are covered by this operand.
6810+
// If isSet os false, we clear all bits that are covered by this operand.
6811+
//
6812+
void G4_Operand::updateFootPrint(BitSet& footprint, bool isSet)
6813+
{
6814+
unsigned N = NUM_BITS_PER_ELT;
6815+
unsigned lb = getLeftBound();
6816+
unsigned rb = getRightBound();
6817+
const bool doFastPath = true; // for debugging
6818+
6819+
if (doFastPath && lb % N == 0 && (rb + 1) % N == 0)
6820+
{
6821+
// lb is 32-byte aligned, set one dword at a time
6822+
unsigned idx = lb / N;
6823+
unsigned endIdx = rb / N;
6824+
// get the precise footprint for the first two GRF
6825+
for (int i = 0; i < 2 && idx <= endIdx; ++i, ++idx)
6826+
{
6827+
uint64_t bits = getBitVecL();
6828+
uint32_t bitVal = (uint32_t)(i % 2 ? bits >> N : bits);
6829+
if (isSet)
6830+
{
6831+
footprint.setElt(idx, bitVal);
6832+
}
6833+
else
6834+
{
6835+
footprint.resetElt(idx, bitVal);
6836+
}
6837+
}
6838+
6839+
// beyond the first two GRF we assume every byte is touched
6840+
while (idx <= endIdx)
6841+
{
6842+
if (isSet)
6843+
{
6844+
footprint.setElt(idx, 0xFFFFFFFF);
6845+
}
6846+
else
6847+
{
6848+
footprint.resetElt(idx, 0xFFFFFFFF);
6849+
}
6850+
idx++;
6851+
}
6852+
}
6853+
else
6854+
{
6855+
// handle unaligned case
6856+
uint64_t mask0 = getBitVecL();
6857+
unsigned j = lb;
6858+
for (unsigned i = 0; i < 64 && j <= rb; ++i, ++j)
6859+
{
6860+
if (mask0 & (1ULL << i))
6861+
footprint.set(j, isSet);
6862+
}
6863+
while (j++ <= rb)
6864+
footprint.set(j, isSet);
6865+
}
6866+
}
6867+
6868+
// update bit vector for this operand based on it size
6869+
// We assume all bytes are touched
6870+
void G4_Operand::setBitVecFromSize(uint32_t NBytes)
6871+
{
6872+
bitVec[0] = NBytes < 64 ? (1ULL << NBytes) - 1 : ULLONG_MAX;
6873+
bitVec[1] = 0;
6874+
}
6875+
69086876
// Left and right bound for every operand is based off
69096877
// top most dcl.
69106878
// For flag register as dst/src/pred/cond mod, each bit of
@@ -7019,23 +6987,7 @@ void G4_InstSend::computeRightBound(G4_Operand* opnd)
70196987
LB + numReg * G4_GRF_REG_NBYTES) - 1;
70206988

70216989
unsigned NBytes = RB - LB + 1;
7022-
if (NBytes <= 32)
7023-
{
7024-
uint32_t Mask = uint32_t(-1) >> (32 - NBytes);
7025-
opnd->setBitVecL(Mask);
7026-
}
7027-
else if (NBytes <= 64)
7028-
{
7029-
opnd->setBitVecL(0xFFFFFFFF);
7030-
uint32_t Mask = uint32_t(-1) >> (64 - NBytes);
7031-
opnd->setBitVecH(Mask);
7032-
}
7033-
else
7034-
{
7035-
// NBytes > 64
7036-
opnd->setBitVecL(0xFFFFFFFF);
7037-
opnd->setBitVecH(0xFFFFFFFF);
7038-
}
6990+
opnd->setBitVecFromSize(NBytes);
70396991
opnd->setRightBound(RB);
70406992
};
70416993

0 commit comments

Comments
 (0)