@@ -423,13 +423,14 @@ void ConstantCoalescing::ProcessBlock(
423
423
424
424
uint offsetInBytes = 0 ;
425
425
Value* baseOffsetInBytes = nullptr ;
426
+ ExtensionKind Extension = EK_NotExtended;
426
427
if (ConstantInt * offsetConstVal = dyn_cast<ConstantInt>(ldRaw->getOffsetValue ()))
427
428
{
428
429
offsetInBytes = int_cast<uint>(offsetConstVal->getZExtValue ());
429
430
}
430
431
else
431
432
{
432
- baseOffsetInBytes = SimpleBaseOffset (ldRaw->getOffsetValue (), offsetInBytes);
433
+ baseOffsetInBytes = SimpleBaseOffset (ldRaw->getOffsetValue (), offsetInBytes, Extension );
433
434
}
434
435
if ((int32_t )offsetInBytes >= 0 )
435
436
{
@@ -448,6 +449,7 @@ void ConstantCoalescing::ProcessBlock(
448
449
baseOffsetInBytes,
449
450
offsetInBytes,
450
451
maxEltPlus,
452
+ Extension,
451
453
baseOffsetInBytes ? indcb_owloads : dircb_owloads);
452
454
}
453
455
else if (bufType == BINDLESS_CONSTANT_BUFFER
@@ -473,6 +475,7 @@ void ConstantCoalescing::ProcessBlock(
473
475
baseOffsetInBytes,
474
476
offsetInBytes,
475
477
maxEltPlus,
478
+ Extension,
476
479
indcb_gathers);
477
480
}
478
481
}
@@ -485,6 +488,7 @@ void ConstantCoalescing::ProcessBlock(
485
488
baseOffsetInBytes,
486
489
offsetInBytes,
487
490
maxEltPlus,
491
+ Extension,
488
492
indcb_gathers);
489
493
}
490
494
}
@@ -518,7 +522,8 @@ void ConstantCoalescing::ProcessBlock(
518
522
Value* buf_idxv = nullptr ;
519
523
Value* elt_idxv = nullptr ;
520
524
uint offsetInBytes = 0 ;
521
- if (DecomposePtrExp (LI->getPointerOperand (), buf_idxv, elt_idxv, offsetInBytes))
525
+ ExtensionKind Extension = EK_NotExtended;
526
+ if (DecomposePtrExp (LI->getPointerOperand (), buf_idxv, elt_idxv, offsetInBytes, Extension))
522
527
{
523
528
// TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
524
529
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
@@ -527,13 +532,13 @@ void ConstantCoalescing::ProcessBlock(
527
532
if (wiAns->isUniform (LI))
528
533
{ // uniform
529
534
if (elt_idxv)
530
- MergeUniformLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
535
+ MergeUniformLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
531
536
else
532
- MergeUniformLoad (LI, buf_idxv, 0 , nullptr , offsetInBytes, maxEltPlus, dircb_owloads);
537
+ MergeUniformLoad (LI, buf_idxv, 0 , nullptr , offsetInBytes, maxEltPlus, Extension, dircb_owloads);
533
538
}
534
539
else
535
540
{ // not uniform
536
- MergeScatterLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, indcb_gathers);
541
+ MergeScatterLoad (LI, buf_idxv, 0 , elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_gathers);
537
542
}
538
543
}
539
544
}
@@ -552,7 +557,7 @@ void ConstantCoalescing::ProcessBlock(
552
557
continue ;
553
558
if (isa<ConstantPointerNull>(elt_ptrv))
554
559
{
555
- MergeUniformLoad (LI, nullptr , addrSpace, nullptr , 0 , maxEltPlus, dircb_owloads);
560
+ MergeUniformLoad (LI, nullptr , addrSpace, nullptr , 0 , maxEltPlus, EK_NotExtended, dircb_owloads);
556
561
}
557
562
else if (isa<IntToPtrInst>(elt_ptrv))
558
563
{
@@ -565,20 +570,21 @@ void ConstantCoalescing::ProcessBlock(
565
570
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
566
571
if ((int32_t )offsetInBytes >= 0 )
567
572
{
568
- MergeUniformLoad (LI, nullptr , addrSpace, nullptr , offsetInBytes, maxEltPlus, dircb_owloads);
573
+ MergeUniformLoad (LI, nullptr , addrSpace, nullptr , offsetInBytes, maxEltPlus, EK_NotExtended, dircb_owloads);
569
574
}
570
575
}
571
576
else
572
577
{ // indirect access
573
578
uint offsetInBytes = 0 ;
574
- elt_idxv = SimpleBaseOffset (elt_idxv, offsetInBytes);
579
+ ExtensionKind Extension = EK_NotExtended;
580
+ elt_idxv = SimpleBaseOffset (elt_idxv, offsetInBytes, Extension);
575
581
// TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
576
582
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
577
583
if ((int32_t )offsetInBytes >= 0 )
578
584
{
579
585
if (wiAns->isUniform (LI))
580
586
{ // uniform
581
- MergeUniformLoad (LI, nullptr , addrSpace, elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
587
+ MergeUniformLoad (LI, nullptr , addrSpace, elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
582
588
}
583
589
else if (bufType == CONSTANT_BUFFER)
584
590
{ // not uniform
@@ -595,6 +601,7 @@ void ConstantCoalescing::ProcessBlock(
595
601
elt_idxv,
596
602
offsetInBytes,
597
603
maxEltPlus,
604
+ Extension,
598
605
indcb_gathers);
599
606
}
600
607
}
@@ -788,6 +795,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
788
795
Value* bufIdxV, uint addrSpace,
789
796
Value* eltIdxV, uint offsetInBytes,
790
797
uint maxEltPlus,
798
+ const ExtensionKind& Extension,
791
799
std::vector<BufChunk*>& chunk_vec)
792
800
{
793
801
const uint scalarSizeInBytes = load->getType ()->getScalarSizeInBits () / 8 ;
@@ -852,7 +860,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
852
860
cov_chunk->chunkStart = eltid;
853
861
cov_chunk->chunkSize = maxEltPlus;
854
862
const uint chunkAlignment = std::max<uint>(alignment, 4 );
855
- cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment);
863
+ cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment, Extension );
856
864
857
865
// Update load alignment if needed, set it to DWORD aligned
858
866
if (alignment < 4 )
@@ -867,7 +875,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
867
875
{
868
876
// combine the initial scalar loads with this incoming load (which can be a vector-load),
869
877
// then add extracts
870
- CombineTwoLoads (cov_chunk, load, eltid, maxEltPlus);
878
+ CombineTwoLoads (cov_chunk, load, eltid, maxEltPlus, Extension );
871
879
}
872
880
else if (load->getType ()->isVectorTy ())
873
881
{
@@ -885,7 +893,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
885
893
}
886
894
else
887
895
{
888
- AdjustChunk (cov_chunk, start_adj, size_adj);
896
+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
889
897
}
890
898
MoveExtracts (cov_chunk, load, (eltid - cov_chunk->chunkStart ));
891
899
}
@@ -910,7 +918,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
910
918
}
911
919
else if (start_adj > 0 )
912
920
{
913
- splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid);
921
+ splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid, Extension );
914
922
}
915
923
else if (size_adj > 0 )
916
924
{
@@ -922,7 +930,8 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
922
930
923
931
}
924
932
925
- Value* ConstantCoalescing::FormChunkAddress (BufChunk* chunk)
933
+ Value* ConstantCoalescing::FormChunkAddress (
934
+ BufChunk* chunk, const ExtensionKind &Extension)
926
935
{
927
936
IGC_ASSERT (nullptr != chunk);
928
937
IGC_ASSERT_MESSAGE ((chunk->bufIdxV || chunk->baseIdxV ), " at least one!" );
@@ -959,7 +968,10 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
959
968
if (eac->getType ()->getPrimitiveSizeInBits () <
960
969
bufsrc->getType ()->getPrimitiveSizeInBits ())
961
970
{
962
- eac = irBuilder->CreateZExt (eac, bufsrc->getType ());
971
+ if (Extension == EK_SignExt)
972
+ eac = irBuilder->CreateSExt (eac, bufsrc->getType ());
973
+ else
974
+ eac = irBuilder->CreateZExt (eac, bufsrc->getType ());
963
975
wiAns->incUpdateDepend (eac, uniformness);
964
976
}
965
977
IGC_ASSERT (eac->getType () == bufsrc->getType ());
@@ -979,7 +991,8 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
979
991
return eac;
980
992
}
981
993
982
- void ConstantCoalescing::CombineTwoLoads (BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt)
994
+ void ConstantCoalescing::CombineTwoLoads (
995
+ BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt, const ExtensionKind &Extension)
983
996
{
984
997
uint eltid0 = cov_chunk->chunkStart ;
985
998
uint lb = std::min (eltid0, eltid);
@@ -1013,7 +1026,7 @@ void ConstantCoalescing::CombineTwoLoads(BufChunk* cov_chunk, Instruction* load,
1013
1026
// modify the address calculation if the chunk-start is changed
1014
1027
if (eltid0 != cov_chunk->chunkStart )
1015
1028
{
1016
- eac = FormChunkAddress (cov_chunk);
1029
+ eac = FormChunkAddress (cov_chunk, Extension );
1017
1030
}
1018
1031
// new IntToPtr and new load
1019
1032
// cannot use irbuilder to create IntToPtr. It may create ConstantExpr instead of instruction
@@ -1160,6 +1173,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
1160
1173
Value* bufIdxV, uint addrSpace,
1161
1174
Value* eltIdxV, uint offsetInBytes,
1162
1175
uint maxEltPlus,
1176
+ const ExtensionKind &Extension,
1163
1177
std::vector<BufChunk*>& chunk_vec)
1164
1178
{
1165
1179
const uint alignment = GetAlignment (load);
@@ -1234,7 +1248,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
1234
1248
cov_chunk->chunkStart = eltid;
1235
1249
cov_chunk->chunkSize = iSTD::RoundPower2 ((DWORD)maxEltPlus);
1236
1250
const uint chunkAlignment = std::max<uint>(alignment, 4 );
1237
- cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment);
1251
+ cov_chunk->chunkIO = CreateChunkLoad (load, cov_chunk, eltid, chunkAlignment, Extension );
1238
1252
chunk_vec.push_back (cov_chunk);
1239
1253
}
1240
1254
}
@@ -1282,7 +1296,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
1282
1296
}
1283
1297
else
1284
1298
{
1285
- AdjustChunk (cov_chunk, start_adj, size_adj);
1299
+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
1286
1300
}
1287
1301
MoveExtracts (cov_chunk, load, eltid - cov_chunk->chunkStart );
1288
1302
}
@@ -1309,7 +1323,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
1309
1323
}
1310
1324
else if (start_adj > 0 )
1311
1325
{
1312
- splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid);
1326
+ splitter = AdjustChunkAddExtract (cov_chunk, start_adj, size_adj, eltid, Extension );
1313
1327
}
1314
1328
else if (size_adj > 0 )
1315
1329
{
@@ -1398,15 +1412,30 @@ uint ConstantCoalescing::GetOffsetAlignment(Value* val) const
1398
1412
return 1 ;
1399
1413
}
1400
1414
1401
- Value* ConstantCoalescing::SimpleBaseOffset (Value* elt_idxv, uint& offset)
1415
+ Value* ConstantCoalescing::SimpleBaseOffset (
1416
+ Value* elt_idxv, uint& offset, ExtensionKind &Extension)
1402
1417
{
1403
1418
// in case expression comes from a smaller type arithmetic
1404
1419
if (ZExtInst * reducedOffset = dyn_cast<ZExtInst>(elt_idxv))
1405
1420
{
1421
+ if (Extension == EK_SignExt)
1422
+ {
1423
+ offset = 0 ;
1424
+ return elt_idxv;
1425
+ }
1426
+
1427
+ Extension = EK_ZeroExt;
1406
1428
elt_idxv = reducedOffset->getOperand (0 );
1407
1429
}
1408
1430
if (SExtInst * reducedOffset = dyn_cast<SExtInst>(elt_idxv))
1409
1431
{
1432
+ if (Extension == EK_ZeroExt)
1433
+ {
1434
+ offset = 0 ;
1435
+ return elt_idxv;
1436
+ }
1437
+
1438
+ Extension = EK_SignExt;
1410
1439
elt_idxv = reducedOffset->getOperand (0 );
1411
1440
}
1412
1441
@@ -1432,7 +1461,7 @@ Value* ConstantCoalescing::SimpleBaseOffset(Value* elt_idxv, uint& offset)
1432
1461
// %535 = or i32 %519, 12
1433
1462
// %537 = add i32 %535, 16
1434
1463
uint offset1 = 0 ;
1435
- Value* base = SimpleBaseOffset (src0, offset1);
1464
+ Value* base = SimpleBaseOffset (src0, offset1, Extension );
1436
1465
offset = offset1 + static_cast <uint>(csrc1->getZExtValue ());
1437
1466
return base;
1438
1467
}
@@ -1528,7 +1557,8 @@ static Value *getPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
1528
1557
}
1529
1558
1530
1559
bool ConstantCoalescing::DecomposePtrExp (
1531
- Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset)
1560
+ Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset,
1561
+ ExtensionKind &Extension)
1532
1562
{
1533
1563
buf_idxv = ptr_val;
1534
1564
elt_idxv = nullptr ;
@@ -1569,7 +1599,7 @@ bool ConstantCoalescing::DecomposePtrExp(
1569
1599
}
1570
1600
else
1571
1601
{
1572
- elt_idxv = SimpleBaseOffset (src1, offset);
1602
+ elt_idxv = SimpleBaseOffset (src1, offset, Extension );
1573
1603
}
1574
1604
return true ;
1575
1605
}
@@ -1583,7 +1613,7 @@ bool ConstantCoalescing::DecomposePtrExp(
1583
1613
}
1584
1614
else
1585
1615
{
1586
- elt_idxv = SimpleBaseOffset (src0, offset);
1616
+ elt_idxv = SimpleBaseOffset (src0, offset, Extension );
1587
1617
}
1588
1618
return true ;
1589
1619
}
@@ -1647,7 +1677,8 @@ uint ConstantCoalescing::CheckVectorElementUses(const Instruction* load)
1647
1677
return maxEltPlus;
1648
1678
}
1649
1679
1650
- Instruction* ConstantCoalescing::CreateChunkLoad (Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment)
1680
+ Instruction* ConstantCoalescing::CreateChunkLoad (
1681
+ Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment, const ExtensionKind &Extension)
1651
1682
{
1652
1683
irBuilder->SetInsertPoint (seedi);
1653
1684
if (LoadInst * load = dyn_cast<LoadInst>(seedi))
@@ -1666,7 +1697,7 @@ Instruction* ConstantCoalescing::CreateChunkLoad(Instruction* seedi, BufChunk* c
1666
1697
if (eltid == chunk->chunkStart && isa<IntToPtrInst>(eac))
1667
1698
eac = dyn_cast<IntToPtrInst>(eac)->getOperand (0 );
1668
1699
else
1669
- eac = FormChunkAddress (chunk);
1700
+ eac = FormChunkAddress (chunk, Extension );
1670
1701
}
1671
1702
else
1672
1703
{
@@ -1794,7 +1825,8 @@ Instruction* ConstantCoalescing::FindOrAddChunkExtract(BufChunk* cov_chunk, uint
1794
1825
return splitter;
1795
1826
}
1796
1827
1797
- void ConstantCoalescing::AdjustChunk (BufChunk* cov_chunk, uint start_adj, uint size_adj)
1828
+ void ConstantCoalescing::AdjustChunk (
1829
+ BufChunk* cov_chunk, uint start_adj, uint size_adj, const ExtensionKind &Extension)
1798
1830
{
1799
1831
cov_chunk->chunkSize += size_adj;
1800
1832
cov_chunk->chunkStart -= start_adj;
@@ -1836,7 +1868,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
1836
1868
Instruction* expr2 = dyn_cast<Instruction>(expr->getOperand (srcIdx));
1837
1869
if (expr2 && expr2->hasOneUse ())
1838
1870
{
1839
- if (isa<ZExtInst>(expr2) && isa<BinaryOperator>(expr2->getOperand (0 )))
1871
+ if (( isa<ZExtInst>(expr2) || isa<SExtInst>(expr2) ) && isa<BinaryOperator>(expr2->getOperand (0 )))
1840
1872
expr2 = cast<Instruction>(expr2->getOperand (0 ));
1841
1873
IGC_ASSERT (isa<BinaryOperator>(expr2));
1842
1874
@@ -1861,7 +1893,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
1861
1893
if (!foundOffset)
1862
1894
{
1863
1895
// if we cannot modify the offset, create a new chain of address calculation
1864
- eac = FormChunkAddress (cov_chunk);
1896
+ eac = FormChunkAddress (cov_chunk, Extension );
1865
1897
cast<Instruction>(addr_ptr)->setOperand (0 , eac);
1866
1898
}
1867
1899
}
@@ -1973,9 +2005,10 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
1973
2005
}
1974
2006
}
1975
2007
1976
- Instruction* ConstantCoalescing::AdjustChunkAddExtract (BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid)
2008
+ Instruction* ConstantCoalescing::AdjustChunkAddExtract (
2009
+ BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid, const ExtensionKind &Extension)
1977
2010
{
1978
- AdjustChunk (cov_chunk, start_adj, size_adj);
2011
+ AdjustChunk (cov_chunk, start_adj, size_adj, Extension );
1979
2012
return AddChunkExtract (cov_chunk->chunkIO , eltid - cov_chunk->chunkStart );
1980
2013
}
1981
2014
0 commit comments