@@ -4723,22 +4723,18 @@ void G4_DstRegRegion::setDstBitVec( uint8_t exec_size )
4723
4723
unsigned short s_size = horzStride * type_size;
4724
4724
4725
4725
// General cases.
4726
- unsigned short bit_seq = G4_Type_Table[type].footprint ;
4726
+ uint64_t bit_seq = G4_Type_Table[type].footprint ;
4727
4727
for (uint8_t i = 0 ; i < exec_size; ++i)
4728
4728
{
4729
4729
int eltOffset = i * s_size;
4730
- if (eltOffset >= getGRFSize ())
4730
+ // no element can cross 64-byte boundary
4731
+ if (eltOffset >= 64 )
4731
4732
{
4732
- footprint1 |= (( uint64_t ) bit_seq) << (eltOffset - getGRFSize () );
4733
+ footprint1 |= bit_seq << (eltOffset - 64 );
4733
4734
}
4734
- else if (eltOffset + G4_Type_Table[type]. byteSize < getGRFSize ())
4735
+ else
4735
4736
{
4736
- footprint0 |= ((uint64_t )bit_seq) << eltOffset;
4737
- }
4738
- else
4739
- {
4740
- footprint0 |= ((uint64_t )bit_seq) << eltOffset; // 4 + 31 --> 1 BIT MASKED
4741
- footprint1 |= ((uint64_t )bit_seq) >> (getGRFSize () - eltOffset); // 32 - 31 = 1, keep 3
4737
+ footprint0 |= bit_seq << eltOffset;
4742
4738
}
4743
4739
}
4744
4740
@@ -4945,67 +4941,33 @@ static G4_CmpRelation compareRegRegionToOperand(G4_Operand* regRegion, G4_Operan
4945
4941
return Rel_interfere;
4946
4942
}
4947
4943
4948
- // Now both operands are within two GRFs, comparing their L/H vectors
4949
- // to get more precise relations
4950
- int dist = left_bound2 - myLeftBound;
4951
- uint64_t new_bitVecL = myBitVecL, new_bitVecH = myBitVecH;
4952
- if (dist > 0 && dist < (2 * GENX_GRF_REG_SIZ))
4953
- {
4954
- if (dist >= GENX_GRF_REG_SIZ)
4955
- {
4956
- uint64_t lbit = new_bitVecH >> (dist - GENX_GRF_REG_SIZ);
4957
- new_bitVecL = lbit;
4958
- new_bitVecH = 0 ;
4959
- }
4960
- else
4961
- {
4962
- new_bitVecL >>= dist;
4963
- uint64_t lbit = new_bitVecH << (GENX_GRF_REG_SIZ - dist);
4964
- new_bitVecL |= lbit;
4965
- new_bitVecH >>= dist;
4966
- }
4967
- }
4968
- else if (dist < 0 && dist > (-2 * GENX_GRF_REG_SIZ))
4944
+ // Now both operands are within two GRFs, compare their footprint to get precise relations
4945
+ int maskSize = 2 * getGRFSize ();
4946
+ if (myDcl)
4969
4947
{
4970
- dist = abs (dist);
4971
- if (dist >= GENX_GRF_REG_SIZ)
4972
- {
4973
- uint64_t lbit = opndBitVecH >> (dist - GENX_GRF_REG_SIZ);
4974
- opndBitVecL = lbit;
4975
- opndBitVecH = 0 ;
4976
- }
4977
- else
4978
- {
4979
- opndBitVecL >>= dist;
4980
- uint64_t lbit = opndBitVecH << (GENX_GRF_REG_SIZ - dist);
4981
- opndBitVecL |= lbit;
4982
- opndBitVecH >>= dist;
4983
- }
4948
+ maskSize = myDcl->getRegVar ()->isFlag () ? myDcl->getNumberFlagElements ()
4949
+ : myDcl->getByteSize ();
4984
4950
}
4985
- uint64_t commonL = new_bitVecL & opndBitVecL, commonH = new_bitVecH & opndBitVecH;
4951
+ BitSet myBitSet (maskSize, false );
4952
+ BitSet otherBitSet (maskSize, false );
4953
+ regRegion->updateFootPrint (myBitSet, true );
4954
+ opnd->updateFootPrint (otherBitSet, true );
4986
4955
4987
- if (myLeftBound <= left_bound2 &&
4988
- myRightBound >= right_bound2 &&
4989
- commonL == opndBitVecL &&
4990
- commonH == opndBitVecH)
4991
- {
4992
- return Rel_gt;
4993
- }
4994
- else if (myLeftBound >= left_bound2 &&
4995
- myRightBound <= right_bound2 &&
4996
- commonL == new_bitVecL &&
4997
- commonH == new_bitVecH)
4998
- {
4999
- return Rel_lt;
5000
- }
5001
- else if (dist < (2 *GENX_GRF_REG_SIZ) && dist > (-2 * GENX_GRF_REG_SIZ) && commonL == 0 && commonH == 0 )
4956
+ BitSet tmp = myBitSet;
4957
+ myBitSet &= otherBitSet;
4958
+ if (myBitSet.isEmpty ())
5002
4959
{
5003
4960
return Rel_disjoint;
5004
4961
}
5005
- else
4962
+
4963
+ myBitSet = tmp;
4964
+ myBitSet -= otherBitSet;
4965
+ if (myBitSet.isEmpty ())
5006
4966
{
5007
- return Rel_interfere ;
4967
+ return Rel_lt ;
5008
4968
}
4969
+ otherBitSet -= tmp;
4970
+ return otherBitSet.isEmpty () ? Rel_gt : Rel_interfere;
5009
4971
}
5010
4972
}
5011
4973
}
@@ -5974,8 +5936,8 @@ void G4_Predicate::splitPred( )
5974
5936
5975
5937
bitVec[0 ] = ((uint32_t )getBitVecL ()) >> shiftLen;
5976
5938
}
5977
- void
5978
- G4_CondMod::emit (std::ostream& output, bool symbolreg)
5939
+
5940
+ void G4_CondMod::emit (std::ostream& output, bool symbolreg)
5979
5941
{
5980
5942
output << ' .' << CondModStr[mod];
5981
5943
output << ' .' ;
@@ -6191,53 +6153,6 @@ G4_Imm::emitAutoFmt(std::ostream& output)
6191
6153
}
6192
6154
}
6193
6155
6194
- int64_t G4_Imm::typecastVals (int64_t value, G4_Type type)
6195
- {
6196
- int64_t retVal = 0 ;
6197
- switch (type)
6198
- {
6199
- case Type_UD:
6200
- case Type_UV:
6201
- case Type_VF:
6202
- {
6203
- retVal = (int64_t )((unsigned int )value);
6204
- break ;
6205
- }
6206
- case Type_D:
6207
- case Type_V:
6208
- {
6209
- retVal = (int64_t )((int )value);
6210
- break ;
6211
- }
6212
- case Type_UW:
6213
- {
6214
- retVal = (int64_t )((uint16_t )value);
6215
- break ;
6216
- }
6217
- case Type_W:
6218
- {
6219
- retVal = (int64_t )((int16_t )value);
6220
- break ;
6221
- }
6222
- case Type_UB:
6223
- {
6224
- retVal = (int64_t )((uint8_t )value);
6225
- break ;
6226
- }
6227
- case Type_B:
6228
- {
6229
- retVal = (int64_t )((int8_t )value);
6230
- break ;
6231
- }
6232
- default :
6233
- {
6234
- // Dont do float conversions
6235
- retVal = value;
6236
- }
6237
- }
6238
- return retVal;
6239
- }
6240
-
6241
6156
G4_RegVar *
6242
6157
G4_RegVarTransient::getNonTransientBaseRegVar ()
6243
6158
{
@@ -6411,7 +6326,7 @@ void G4_SrcRegRegion::computeLeftBound()
6411
6326
6412
6327
void G4_SrcRegRegion::setSrcBitVec (uint8_t exec_size)
6413
6328
{
6414
- unsigned short bit_seq = G4_Type_Table[type].footprint ;
6329
+ uint64_t bit_seq = G4_Type_Table[type].footprint ;
6415
6330
unsigned short typeSize = (unsigned short )G4_Type_Table[type].byteSize ;
6416
6331
6417
6332
uint64_t footPrint0 = 0 ;
@@ -6424,24 +6339,14 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
6424
6339
}
6425
6340
else if (desc->isContiguous (exec_size))
6426
6341
{
6342
+ // fast path
6427
6343
int totalBytes = exec_size * typeSize;
6428
- MUST_BE_TRUE (totalBytes <= 2 * getGRFSize (), " total bits exceeds 2 GRFs" );
6429
- if (totalBytes == getGRFSize () * 2 )
6430
- {
6431
- footPrint0 = ULLONG_MAX;
6432
- footPrint1 = ULLONG_MAX;
6433
- }
6434
- else
6435
- {
6436
- if (totalBytes <= getGRFSize ())
6437
- {
6438
- footPrint0 = (1ULL << totalBytes) - 1 ;
6439
- }
6440
- else
6441
- {
6442
- footPrint0 = ULLONG_MAX;
6443
- footPrint1 = (1ULL << (totalBytes - getGRFSize ())) - 1 ;
6444
- }
6344
+ MUST_BE_TRUE (totalBytes <= 2 * getGRFSize (), " total bytes exceed 2 GRFs" );
6345
+
6346
+ footPrint0 = totalBytes < 64 ? (1ULL << totalBytes) - 1 : ULLONG_MAX;
6347
+ if (totalBytes > 64 )
6348
+ {
6349
+ footPrint1 = totalBytes == 128 ? ULLONG_MAX : (1ULL << (totalBytes - 64 )) - 1 ;
6445
6350
}
6446
6351
}
6447
6352
else
@@ -6451,28 +6356,21 @@ void G4_SrcRegRegion::setSrcBitVec(uint8_t exec_size)
6451
6356
for (int j = 0 ; j < desc->width ; ++j)
6452
6357
{
6453
6358
int eltOffset = i * desc->vertStride * typeSize + j * desc->horzStride * typeSize;
6454
-
6455
- if (eltOffset >= getGRFSize ())
6456
- {
6457
- footPrint1 |= ((uint64_t )bit_seq) << (eltOffset - getGRFSize ());
6458
- }
6459
- else if (eltOffset + G4_Type_Table[type].byteSize < getGRFSize ())
6359
+ // no element can cross 64-byte boundary
6360
+ if (eltOffset >= 64 )
6460
6361
{
6461
- footPrint0 |= (( uint64_t ) bit_seq) << eltOffset;
6362
+ footPrint1 |= bit_seq << ( eltOffset - 64 ) ;
6462
6363
}
6463
6364
else
6464
6365
{
6465
- footPrint0 |= ((uint64_t )bit_seq) << eltOffset;
6466
- footPrint1 |= ((uint64_t )bit_seq) >> (getGRFSize () - eltOffset);
6366
+ footPrint0 |= bit_seq << eltOffset;
6467
6367
}
6468
6368
}
6469
6369
}
6470
6370
}
6471
6371
6472
6372
bitVec[0 ] = footPrint0;
6473
6373
bitVec[1 ] = footPrint1;
6474
-
6475
- return ;
6476
6374
}
6477
6375
6478
6376
unsigned G4_SrcRegRegion::computeRightBound ( uint8_t exec_size )
@@ -6905,6 +6803,76 @@ void G4_INST::computeLeftBoundForImplAcc(G4_Operand* opnd)
6905
6803
}
6906
6804
}
6907
6805
6806
+ //
6807
+ // Normalize an operand's bitvec footprint based on its left bound
6808
+ // and update the given bitset.
6809
+ // If isSet is true, we set all bits that are covered by this operand.
6810
+ // If isSet os false, we clear all bits that are covered by this operand.
6811
+ //
6812
+ void G4_Operand::updateFootPrint (BitSet& footprint, bool isSet)
6813
+ {
6814
+ unsigned N = NUM_BITS_PER_ELT;
6815
+ unsigned lb = getLeftBound ();
6816
+ unsigned rb = getRightBound ();
6817
+ const bool doFastPath = true ; // for debugging
6818
+
6819
+ if (doFastPath && lb % N == 0 && (rb + 1 ) % N == 0 )
6820
+ {
6821
+ // lb is 32-byte aligned, set one dword at a time
6822
+ unsigned idx = lb / N;
6823
+ unsigned endIdx = rb / N;
6824
+ // get the precise footprint for the first two GRF
6825
+ for (int i = 0 ; i < 2 && idx <= endIdx; ++i, ++idx)
6826
+ {
6827
+ uint64_t bits = getBitVecL ();
6828
+ uint32_t bitVal = (uint32_t )(i % 2 ? bits >> N : bits);
6829
+ if (isSet)
6830
+ {
6831
+ footprint.setElt (idx, bitVal);
6832
+ }
6833
+ else
6834
+ {
6835
+ footprint.resetElt (idx, bitVal);
6836
+ }
6837
+ }
6838
+
6839
+ // beyond the first two GRF we assume every byte is touched
6840
+ while (idx <= endIdx)
6841
+ {
6842
+ if (isSet)
6843
+ {
6844
+ footprint.setElt (idx, 0xFFFFFFFF );
6845
+ }
6846
+ else
6847
+ {
6848
+ footprint.resetElt (idx, 0xFFFFFFFF );
6849
+ }
6850
+ idx++;
6851
+ }
6852
+ }
6853
+ else
6854
+ {
6855
+ // handle unaligned case
6856
+ uint64_t mask0 = getBitVecL ();
6857
+ unsigned j = lb;
6858
+ for (unsigned i = 0 ; i < 64 && j <= rb; ++i, ++j)
6859
+ {
6860
+ if (mask0 & (1ULL << i))
6861
+ footprint.set (j, isSet);
6862
+ }
6863
+ while (j++ <= rb)
6864
+ footprint.set (j, isSet);
6865
+ }
6866
+ }
6867
+
6868
+ // update bit vector for this operand based on it size
6869
+ // We assume all bytes are touched
6870
+ void G4_Operand::setBitVecFromSize (uint32_t NBytes)
6871
+ {
6872
+ bitVec[0 ] = NBytes < 64 ? (1ULL << NBytes) - 1 : ULLONG_MAX;
6873
+ bitVec[1 ] = 0 ;
6874
+ }
6875
+
6908
6876
// Left and right bound for every operand is based off
6909
6877
// top most dcl.
6910
6878
// For flag register as dst/src/pred/cond mod, each bit of
@@ -7019,23 +6987,7 @@ void G4_InstSend::computeRightBound(G4_Operand* opnd)
7019
6987
LB + numReg * G4_GRF_REG_NBYTES) - 1 ;
7020
6988
7021
6989
unsigned NBytes = RB - LB + 1 ;
7022
- if (NBytes <= 32 )
7023
- {
7024
- uint32_t Mask = uint32_t (-1 ) >> (32 - NBytes);
7025
- opnd->setBitVecL (Mask);
7026
- }
7027
- else if (NBytes <= 64 )
7028
- {
7029
- opnd->setBitVecL (0xFFFFFFFF );
7030
- uint32_t Mask = uint32_t (-1 ) >> (64 - NBytes);
7031
- opnd->setBitVecH (Mask);
7032
- }
7033
- else
7034
- {
7035
- // NBytes > 64
7036
- opnd->setBitVecL (0xFFFFFFFF );
7037
- opnd->setBitVecH (0xFFFFFFFF );
7038
- }
6990
+ opnd->setBitVecFromSize (NBytes);
7039
6991
opnd->setRightBound (RB);
7040
6992
};
7041
6993
0 commit comments