Skip to content

Commit d96ab95

Browse files
scottp101igcbot
authored andcommitted
Properly extend integer in ConstantCoalescing.
Original program did: %1398 = add i32 %1386, 96 %1399 = sext i32 %1398 to i64 %1400 = add i64 %39, %1399 %1401 = inttoptr i64 %1400 to <3 x float> addrspace(2)* %1402 = load <3 x float>, <3 x float> addrspace(2)* %1401 After the pass (and vectorized): %0 = add i32 %i1522, 96 %1 = zext i32 %0 to i64 %2 = add i64 %i68, %1 %chunkPtr = inttoptr i64 %2 to <8 x float> addrspace(2)* %3 = load <8 x float>, <8 x float> addrspace(2)* %chunkPtr Note sext was converted to zext. This change retains the sext.
1 parent d7528ab commit d96ab95

File tree

2 files changed

+89
-39
lines changed

2 files changed

+89
-39
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

Lines changed: 65 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -423,13 +423,14 @@ void ConstantCoalescing::ProcessBlock(
423423

424424
uint offsetInBytes = 0;
425425
Value* baseOffsetInBytes = nullptr;
426+
ExtensionKind Extension = EK_NotExtended;
426427
if (ConstantInt * offsetConstVal = dyn_cast<ConstantInt>(ldRaw->getOffsetValue()))
427428
{
428429
offsetInBytes = int_cast<uint>(offsetConstVal->getZExtValue());
429430
}
430431
else
431432
{
432-
baseOffsetInBytes = SimpleBaseOffset(ldRaw->getOffsetValue(), offsetInBytes);
433+
baseOffsetInBytes = SimpleBaseOffset(ldRaw->getOffsetValue(), offsetInBytes, Extension);
433434
}
434435
if ((int32_t)offsetInBytes >= 0)
435436
{
@@ -448,6 +449,7 @@ void ConstantCoalescing::ProcessBlock(
448449
baseOffsetInBytes,
449450
offsetInBytes,
450451
maxEltPlus,
452+
Extension,
451453
baseOffsetInBytes ? indcb_owloads : dircb_owloads);
452454
}
453455
else if (bufType == BINDLESS_CONSTANT_BUFFER
@@ -473,6 +475,7 @@ void ConstantCoalescing::ProcessBlock(
473475
baseOffsetInBytes,
474476
offsetInBytes,
475477
maxEltPlus,
478+
Extension,
476479
indcb_gathers);
477480
}
478481
}
@@ -485,6 +488,7 @@ void ConstantCoalescing::ProcessBlock(
485488
baseOffsetInBytes,
486489
offsetInBytes,
487490
maxEltPlus,
491+
Extension,
488492
indcb_gathers);
489493
}
490494
}
@@ -518,7 +522,8 @@ void ConstantCoalescing::ProcessBlock(
518522
Value* buf_idxv = nullptr;
519523
Value* elt_idxv = nullptr;
520524
uint offsetInBytes = 0;
521-
if (DecomposePtrExp(LI->getPointerOperand(), buf_idxv, elt_idxv, offsetInBytes))
525+
ExtensionKind Extension = EK_NotExtended;
526+
if (DecomposePtrExp(LI->getPointerOperand(), buf_idxv, elt_idxv, offsetInBytes, Extension))
522527
{
523528
// TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
524529
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
@@ -527,13 +532,13 @@ void ConstantCoalescing::ProcessBlock(
527532
if (wiAns->isUniform(LI))
528533
{ // uniform
529534
if (elt_idxv)
530-
MergeUniformLoad(LI, buf_idxv, 0, elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
535+
MergeUniformLoad(LI, buf_idxv, 0, elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
531536
else
532-
MergeUniformLoad(LI, buf_idxv, 0, nullptr, offsetInBytes, maxEltPlus, dircb_owloads);
537+
MergeUniformLoad(LI, buf_idxv, 0, nullptr, offsetInBytes, maxEltPlus, Extension, dircb_owloads);
533538
}
534539
else
535540
{ // not uniform
536-
MergeScatterLoad(LI, buf_idxv, 0, elt_idxv, offsetInBytes, maxEltPlus, indcb_gathers);
541+
MergeScatterLoad(LI, buf_idxv, 0, elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_gathers);
537542
}
538543
}
539544
}
@@ -552,7 +557,7 @@ void ConstantCoalescing::ProcessBlock(
552557
continue;
553558
if (isa<ConstantPointerNull>(elt_ptrv))
554559
{
555-
MergeUniformLoad(LI, nullptr, addrSpace, nullptr, 0, maxEltPlus, dircb_owloads);
560+
MergeUniformLoad(LI, nullptr, addrSpace, nullptr, 0, maxEltPlus, EK_NotExtended, dircb_owloads);
556561
}
557562
else if (isa<IntToPtrInst>(elt_ptrv))
558563
{
@@ -565,20 +570,21 @@ void ConstantCoalescing::ProcessBlock(
565570
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
566571
if ((int32_t)offsetInBytes >= 0)
567572
{
568-
MergeUniformLoad(LI, nullptr, addrSpace, nullptr, offsetInBytes, maxEltPlus, dircb_owloads);
573+
MergeUniformLoad(LI, nullptr, addrSpace, nullptr, offsetInBytes, maxEltPlus, EK_NotExtended, dircb_owloads);
569574
}
570575
}
571576
else
572577
{ // indirect access
573578
uint offsetInBytes = 0;
574-
elt_idxv = SimpleBaseOffset(elt_idxv, offsetInBytes);
579+
ExtensionKind Extension = EK_NotExtended;
580+
elt_idxv = SimpleBaseOffset(elt_idxv, offsetInBytes, Extension);
575581
// TODO: Disabling constant coalescing when we see that the offset to the constant buffer is negtive
576582
// As we handle all negative offsets as uint and some arithmetic operations do not work well. Needs more detailed fix
577583
if ((int32_t)offsetInBytes >= 0)
578584
{
579585
if (wiAns->isUniform(LI))
580586
{ // uniform
581-
MergeUniformLoad(LI, nullptr, addrSpace, elt_idxv, offsetInBytes, maxEltPlus, indcb_owloads);
587+
MergeUniformLoad(LI, nullptr, addrSpace, elt_idxv, offsetInBytes, maxEltPlus, Extension, indcb_owloads);
582588
}
583589
else if (bufType == CONSTANT_BUFFER)
584590
{ // not uniform
@@ -595,6 +601,7 @@ void ConstantCoalescing::ProcessBlock(
595601
elt_idxv,
596602
offsetInBytes,
597603
maxEltPlus,
604+
Extension,
598605
indcb_gathers);
599606
}
600607
}
@@ -788,6 +795,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
788795
Value* bufIdxV, uint addrSpace,
789796
Value* eltIdxV, uint offsetInBytes,
790797
uint maxEltPlus,
798+
const ExtensionKind& Extension,
791799
std::vector<BufChunk*>& chunk_vec)
792800
{
793801
const uint scalarSizeInBytes = load->getType()->getScalarSizeInBits() / 8;
@@ -852,7 +860,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
852860
cov_chunk->chunkStart = eltid;
853861
cov_chunk->chunkSize = maxEltPlus;
854862
const uint chunkAlignment = std::max<uint>(alignment, 4);
855-
cov_chunk->chunkIO = CreateChunkLoad(load, cov_chunk, eltid, chunkAlignment);
863+
cov_chunk->chunkIO = CreateChunkLoad(load, cov_chunk, eltid, chunkAlignment, Extension);
856864

857865
// Update load alignment if needed, set it to DWORD aligned
858866
if (alignment < 4)
@@ -867,7 +875,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
867875
{
868876
// combine the initial scalar loads with this incoming load (which can be a vector-load),
869877
// then add extracts
870-
CombineTwoLoads(cov_chunk, load, eltid, maxEltPlus);
878+
CombineTwoLoads(cov_chunk, load, eltid, maxEltPlus, Extension);
871879
}
872880
else if (load->getType()->isVectorTy())
873881
{
@@ -885,7 +893,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
885893
}
886894
else
887895
{
888-
AdjustChunk(cov_chunk, start_adj, size_adj);
896+
AdjustChunk(cov_chunk, start_adj, size_adj, Extension);
889897
}
890898
MoveExtracts(cov_chunk, load, (eltid - cov_chunk->chunkStart));
891899
}
@@ -910,7 +918,7 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
910918
}
911919
else if (start_adj > 0)
912920
{
913-
splitter = AdjustChunkAddExtract(cov_chunk, start_adj, size_adj, eltid);
921+
splitter = AdjustChunkAddExtract(cov_chunk, start_adj, size_adj, eltid, Extension);
914922
}
915923
else if (size_adj > 0)
916924
{
@@ -922,7 +930,8 @@ void ConstantCoalescing::MergeScatterLoad(Instruction* load,
922930

923931
}
924932

925-
Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
933+
Value* ConstantCoalescing::FormChunkAddress(
934+
BufChunk* chunk, const ExtensionKind &Extension)
926935
{
927936
IGC_ASSERT(nullptr != chunk);
928937
IGC_ASSERT_MESSAGE((chunk->bufIdxV || chunk->baseIdxV), "at least one!");
@@ -959,7 +968,10 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
959968
if (eac->getType()->getPrimitiveSizeInBits() <
960969
bufsrc->getType()->getPrimitiveSizeInBits())
961970
{
962-
eac = irBuilder->CreateZExt(eac, bufsrc->getType());
971+
if (Extension == EK_SignExt)
972+
eac = irBuilder->CreateSExt(eac, bufsrc->getType());
973+
else
974+
eac = irBuilder->CreateZExt(eac, bufsrc->getType());
963975
wiAns->incUpdateDepend(eac, uniformness);
964976
}
965977
IGC_ASSERT(eac->getType() == bufsrc->getType());
@@ -979,7 +991,8 @@ Value* ConstantCoalescing::FormChunkAddress(BufChunk* chunk)
979991
return eac;
980992
}
981993

982-
void ConstantCoalescing::CombineTwoLoads(BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt)
994+
void ConstantCoalescing::CombineTwoLoads(
995+
BufChunk* cov_chunk, Instruction* load, uint eltid, uint numelt, const ExtensionKind &Extension)
983996
{
984997
uint eltid0 = cov_chunk->chunkStart;
985998
uint lb = std::min(eltid0, eltid);
@@ -1013,7 +1026,7 @@ void ConstantCoalescing::CombineTwoLoads(BufChunk* cov_chunk, Instruction* load,
10131026
// modify the address calculation if the chunk-start is changed
10141027
if (eltid0 != cov_chunk->chunkStart)
10151028
{
1016-
eac = FormChunkAddress(cov_chunk);
1029+
eac = FormChunkAddress(cov_chunk, Extension);
10171030
}
10181031
// new IntToPtr and new load
10191032
// cannot use irbuilder to create IntToPtr. It may create ConstantExpr instead of instruction
@@ -1160,6 +1173,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
11601173
Value* bufIdxV, uint addrSpace,
11611174
Value* eltIdxV, uint offsetInBytes,
11621175
uint maxEltPlus,
1176+
const ExtensionKind &Extension,
11631177
std::vector<BufChunk*>& chunk_vec)
11641178
{
11651179
const uint alignment = GetAlignment(load);
@@ -1234,7 +1248,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
12341248
cov_chunk->chunkStart = eltid;
12351249
cov_chunk->chunkSize = iSTD::RoundPower2((DWORD)maxEltPlus);
12361250
const uint chunkAlignment = std::max<uint>(alignment, 4);
1237-
cov_chunk->chunkIO = CreateChunkLoad(load, cov_chunk, eltid, chunkAlignment);
1251+
cov_chunk->chunkIO = CreateChunkLoad(load, cov_chunk, eltid, chunkAlignment, Extension);
12381252
chunk_vec.push_back(cov_chunk);
12391253
}
12401254
}
@@ -1282,7 +1296,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
12821296
}
12831297
else
12841298
{
1285-
AdjustChunk(cov_chunk, start_adj, size_adj);
1299+
AdjustChunk(cov_chunk, start_adj, size_adj, Extension);
12861300
}
12871301
MoveExtracts(cov_chunk, load, eltid - cov_chunk->chunkStart);
12881302
}
@@ -1309,7 +1323,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction* load,
13091323
}
13101324
else if (start_adj > 0)
13111325
{
1312-
splitter = AdjustChunkAddExtract(cov_chunk, start_adj, size_adj, eltid);
1326+
splitter = AdjustChunkAddExtract(cov_chunk, start_adj, size_adj, eltid, Extension);
13131327
}
13141328
else if (size_adj > 0)
13151329
{
@@ -1398,15 +1412,30 @@ uint ConstantCoalescing::GetOffsetAlignment(Value* val) const
13981412
return 1;
13991413
}
14001414

1401-
Value* ConstantCoalescing::SimpleBaseOffset(Value* elt_idxv, uint& offset)
1415+
Value* ConstantCoalescing::SimpleBaseOffset(
1416+
Value* elt_idxv, uint& offset, ExtensionKind &Extension)
14021417
{
14031418
// in case expression comes from a smaller type arithmetic
14041419
if (ZExtInst * reducedOffset = dyn_cast<ZExtInst>(elt_idxv))
14051420
{
1421+
if (Extension == EK_SignExt)
1422+
{
1423+
offset = 0;
1424+
return elt_idxv;
1425+
}
1426+
1427+
Extension = EK_ZeroExt;
14061428
elt_idxv = reducedOffset->getOperand(0);
14071429
}
14081430
if (SExtInst * reducedOffset = dyn_cast<SExtInst>(elt_idxv))
14091431
{
1432+
if (Extension == EK_ZeroExt)
1433+
{
1434+
offset = 0;
1435+
return elt_idxv;
1436+
}
1437+
1438+
Extension = EK_SignExt;
14101439
elt_idxv = reducedOffset->getOperand(0);
14111440
}
14121441

@@ -1432,7 +1461,7 @@ Value* ConstantCoalescing::SimpleBaseOffset(Value* elt_idxv, uint& offset)
14321461
// %535 = or i32 %519, 12
14331462
// %537 = add i32 %535, 16
14341463
uint offset1 = 0;
1435-
Value* base = SimpleBaseOffset(src0, offset1);
1464+
Value* base = SimpleBaseOffset(src0, offset1, Extension);
14361465
offset = offset1 + static_cast<uint>(csrc1->getZExtValue());
14371466
return base;
14381467
}
@@ -1528,7 +1557,8 @@ static Value *getPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
15281557
}
15291558

15301559
bool ConstantCoalescing::DecomposePtrExp(
1531-
Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset)
1560+
Value* ptr_val, Value*& buf_idxv, Value*& elt_idxv, uint& offset,
1561+
ExtensionKind &Extension)
15321562
{
15331563
buf_idxv = ptr_val;
15341564
elt_idxv = nullptr;
@@ -1569,7 +1599,7 @@ bool ConstantCoalescing::DecomposePtrExp(
15691599
}
15701600
else
15711601
{
1572-
elt_idxv = SimpleBaseOffset(src1, offset);
1602+
elt_idxv = SimpleBaseOffset(src1, offset, Extension);
15731603
}
15741604
return true;
15751605
}
@@ -1583,7 +1613,7 @@ bool ConstantCoalescing::DecomposePtrExp(
15831613
}
15841614
else
15851615
{
1586-
elt_idxv = SimpleBaseOffset(src0, offset);
1616+
elt_idxv = SimpleBaseOffset(src0, offset, Extension);
15871617
}
15881618
return true;
15891619
}
@@ -1647,7 +1677,8 @@ uint ConstantCoalescing::CheckVectorElementUses(const Instruction* load)
16471677
return maxEltPlus;
16481678
}
16491679

1650-
Instruction* ConstantCoalescing::CreateChunkLoad(Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment)
1680+
Instruction* ConstantCoalescing::CreateChunkLoad(
1681+
Instruction* seedi, BufChunk* chunk, uint eltid, uint alignment, const ExtensionKind &Extension)
16511682
{
16521683
irBuilder->SetInsertPoint(seedi);
16531684
if (LoadInst * load = dyn_cast<LoadInst>(seedi))
@@ -1666,7 +1697,7 @@ Instruction* ConstantCoalescing::CreateChunkLoad(Instruction* seedi, BufChunk* c
16661697
if (eltid == chunk->chunkStart && isa<IntToPtrInst>(eac))
16671698
eac = dyn_cast<IntToPtrInst>(eac)->getOperand(0);
16681699
else
1669-
eac = FormChunkAddress(chunk);
1700+
eac = FormChunkAddress(chunk, Extension);
16701701
}
16711702
else
16721703
{
@@ -1794,7 +1825,8 @@ Instruction* ConstantCoalescing::FindOrAddChunkExtract(BufChunk* cov_chunk, uint
17941825
return splitter;
17951826
}
17961827

1797-
void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint size_adj)
1828+
void ConstantCoalescing::AdjustChunk(
1829+
BufChunk* cov_chunk, uint start_adj, uint size_adj, const ExtensionKind &Extension)
17981830
{
17991831
cov_chunk->chunkSize += size_adj;
18001832
cov_chunk->chunkStart -= start_adj;
@@ -1836,7 +1868,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
18361868
Instruction* expr2 = dyn_cast<Instruction>(expr->getOperand(srcIdx));
18371869
if (expr2 && expr2->hasOneUse())
18381870
{
1839-
if (isa<ZExtInst>(expr2) && isa<BinaryOperator>(expr2->getOperand(0)))
1871+
if ((isa<ZExtInst>(expr2) || isa<SExtInst>(expr2)) && isa<BinaryOperator>(expr2->getOperand(0)))
18401872
expr2 = cast<Instruction>(expr2->getOperand(0));
18411873
IGC_ASSERT(isa<BinaryOperator>(expr2));
18421874

@@ -1861,7 +1893,7 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
18611893
if (!foundOffset)
18621894
{
18631895
// if we cannot modify the offset, create a new chain of address calculation
1864-
eac = FormChunkAddress(cov_chunk);
1896+
eac = FormChunkAddress(cov_chunk, Extension);
18651897
cast<Instruction>(addr_ptr)->setOperand(0, eac);
18661898
}
18671899
}
@@ -1973,9 +2005,10 @@ void ConstantCoalescing::AdjustChunk(BufChunk* cov_chunk, uint start_adj, uint s
19732005
}
19742006
}
19752007

1976-
Instruction* ConstantCoalescing::AdjustChunkAddExtract(BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid)
2008+
Instruction* ConstantCoalescing::AdjustChunkAddExtract(
2009+
BufChunk* cov_chunk, uint start_adj, uint size_adj, uint eltid, const ExtensionKind &Extension)
19772010
{
1978-
AdjustChunk(cov_chunk, start_adj, size_adj);
2011+
AdjustChunk(cov_chunk, start_adj, size_adj, Extension);
19792012
return AddChunkExtract(cov_chunk->chunkIO, eltid - cov_chunk->chunkStart);
19802013
}
19812014

0 commit comments

Comments
 (0)