Skip to content

Commit 8d8f85a

Browse files
mmereckisys_zuul
authored andcommitted
Update ConstantCoalescing pass. Add possibility of creating sampler loads with byte address – current code uses addresses in 16byte units.
Change-Id: Iea5a7ebcaf3e11b430f611ef6e6c9c7e89783b55
1 parent c815be9 commit 8d8f85a

File tree

2 files changed

+54
-32
lines changed

2 files changed

+54
-32
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,6 +1816,8 @@ void ConstantCoalescing::ScatterToSampler(
18161816

18171817
assert(!load->getType()->isVectorTy() || load->getType()->getVectorNumElements() <= 4);
18181818

1819+
const bool useByteAddress = m_ctx->m_DriverInfo.UsesTypedConstantBuffersWithByteAddress();
1820+
18191821
// Code below doesn't support crossing 4 DWORD boundary i.e. mapping a
18201822
// single input load to multiple sampler loads.
18211823
const bool canBeLoadedUsingSampler =
@@ -1830,26 +1832,34 @@ void ConstantCoalescing::ScatterToSampler(
18301832

18311833
WIAnalysis::WIDependancy baseInBytesDep = wiAns->whichDepend(baseInBytes);
18321834

1833-
Value* baseAddressInOwords = GetSamplerAlignedAddress(baseInBytes);
1834-
assert(baseAddressInOwords);
1835-
1836-
// it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1837-
if (baseAddressInOwords != baseInBytes->getOperand(0))
1838-
{
1839-
Value* newVal = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 4));
1840-
wiAns->incUpdateDepend(newVal, baseInBytesDep);
1841-
baseInBytes->replaceAllUsesWith(newVal);
1842-
}
1843-
else if (wiAns->whichDepend(baseAddressInOwords) != baseInBytesDep)
1835+
// Data address for sampler load, either in OWORDs or in bytes
1836+
Value* chunkBaseAddress = baseInBytes;
1837+
if (!useByteAddress)
18441838
{
1845-
// quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1846-
// If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1847-
// we can change the uniformness of baseAddressInOwords
1848-
baseAddressInOwords = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 0));
1849-
wiAns->incUpdateDepend(baseAddressInOwords, baseInBytesDep);
1850-
Value* newVal = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 4));
1851-
wiAns->incUpdateDepend(newVal, baseInBytesDep);
1852-
baseInBytes->replaceAllUsesWith(newVal);
1839+
// base address is in OWORDs
1840+
Value* baseAddressInOwords = GetSamplerAlignedAddress(baseInBytes);
1841+
assert(baseAddressInOwords);
1842+
1843+
// it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1844+
if (baseAddressInOwords != baseInBytes->getOperand(0))
1845+
{
1846+
Value* newVal = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 4));
1847+
wiAns->incUpdateDepend(newVal, baseInBytesDep);
1848+
baseInBytes->replaceAllUsesWith(newVal);
1849+
}
1850+
else if (wiAns->whichDepend(baseAddressInOwords) != baseInBytesDep)
1851+
{
1852+
// quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1853+
// If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1854+
// we can change the uniformness of baseAddressInOwords
1855+
baseAddressInOwords = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 0));
1856+
wiAns->incUpdateDepend(baseAddressInOwords, baseInBytesDep);
1857+
Value* newVal = irBuilder->CreateShl(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 4));
1858+
wiAns->incUpdateDepend(newVal, baseInBytesDep);
1859+
baseInBytes->replaceAllUsesWith(newVal);
1860+
}
1861+
1862+
chunkBaseAddress = baseAddressInOwords;
18531863
}
18541864
BufChunk* cov_chunk = nullptr;
18551865
for (std::vector<BufChunk*>::reverse_iterator rit = chunk_vec.rbegin(),
@@ -1858,7 +1868,7 @@ void ConstantCoalescing::ScatterToSampler(
18581868
BufChunk* cur_chunk = *rit;
18591869
// Look for an existing sampler load covering data range of the input load.
18601870
if (CompareBufferBase(cur_chunk->bufIdxV, cur_chunk->addrSpace, bufIdxV, addrSpace) &&
1861-
cur_chunk->baseIdxV == baseAddressInOwords)
1871+
cur_chunk->baseIdxV == chunkBaseAddress)
18621872
{
18631873
const uint chunkStartInBytes = cur_chunk->chunkStart * cur_chunk->elementSize;
18641874
const uint chunkEndInBytes = (cur_chunk->chunkStart + cur_chunk->chunkSize) * cur_chunk->elementSize;
@@ -1877,23 +1887,32 @@ void ConstantCoalescing::ScatterToSampler(
18771887
cov_chunk = new BufChunk();
18781888
cov_chunk->bufIdxV = bufIdxV;
18791889
cov_chunk->addrSpace = addrSpace;
1880-
cov_chunk->baseIdxV = baseAddressInOwords;
1881-
cov_chunk->elementSize = samplerElementSizeInBytes;
1882-
cov_chunk->chunkStart = iSTD::RoundDown((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords);
1883-
cov_chunk->chunkSize = samplerLoadSizeInDwords;
1890+
cov_chunk->baseIdxV = chunkBaseAddress;
1891+
cov_chunk->elementSize = samplerElementSizeInBytes; // 4 bytes
1892+
cov_chunk->chunkStart = iSTD::RoundDown((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords); // in DWORDS aligned to OWORDs
1893+
cov_chunk->chunkSize = samplerLoadSizeInDwords; // in DWORDs
1894+
1895+
Value* dataAddress = chunkBaseAddress;
18841896
if (offsetInBytes >= samplerLoadSizeInBytes)
18851897
{
1886-
baseAddressInOwords = irBuilder->CreateAdd(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), (offsetInBytes / samplerLoadSizeInBytes)));
1887-
wiAns->incUpdateDepend(baseAddressInOwords, WIAnalysis::RANDOM);
1898+
const uint32_t chunkOffset = (useByteAddress) ?
1899+
(cov_chunk->chunkStart * cov_chunk->elementSize) : // in bytes
1900+
(offsetInBytes / samplerLoadSizeInBytes); //in OWORDs
1901+
dataAddress = irBuilder->CreateAdd(dataAddress, ConstantInt::get(dataAddress->getType(), chunkOffset));
1902+
wiAns->incUpdateDepend(dataAddress, WIAnalysis::RANDOM);
18881903
}
1889-
if (baseAddressInOwords->getType()->getIntegerBitWidth() >= 32)
1904+
if (dataAddress->getType()->getIntegerBitWidth() >= 32 && !useByteAddress)
18901905
{
1891-
baseAddressInOwords = irBuilder->CreateAnd(baseAddressInOwords, ConstantInt::get(baseAddressInOwords->getType(), 0x0FFFFFFF));
1892-
wiAns->incUpdateDepend(baseAddressInOwords, WIAnalysis::RANDOM);
1906+
dataAddress = irBuilder->CreateAnd(dataAddress, ConstantInt::get(dataAddress->getType(), 0x0FFFFFFF));
1907+
wiAns->incUpdateDepend(dataAddress, WIAnalysis::RANDOM);
18931908
}
1894-
baseAddressInOwords = irBuilder->CreateZExtOrTrunc(baseAddressInOwords, irBuilder->getInt32Ty());
1895-
wiAns->incUpdateDepend(baseAddressInOwords, WIAnalysis::RANDOM);
1896-
ld = CreateSamplerLoad(baseAddressInOwords, addrSpace);
1909+
if (dataAddress->getType() != irBuilder->getInt32Ty())
1910+
{
1911+
dataAddress = irBuilder->CreateZExtOrTrunc(dataAddress, irBuilder->getInt32Ty());
1912+
wiAns->incUpdateDepend(dataAddress, WIAnalysis::RANDOM);
1913+
}
1914+
1915+
ld = CreateSamplerLoad(dataAddress, addrSpace);
18971916
cov_chunk->chunkIO = ld;
18981917
chunk_vec.push_back(cov_chunk);
18991918
}

IGC/Compiler/CISACodeGen/DriverInfo.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ namespace IGC
7474
/// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
7575
virtual bool UsesTypedConstantBuffers3D() const { return true; }
7676

77+
/// The driver uses typed constant buffers requiring byte address access.
78+
virtual bool UsesTypedConstantBuffersWithByteAddress() const { return false; }
79+
7780
/// The driver uses typed or untyped constant buffers (for ld_raw vs sampler)
7881
virtual bool UsesTypedConstantBuffersGPGPU() const { return true; }
7982

0 commit comments

Comments
 (0)