@@ -1816,6 +1816,8 @@ void ConstantCoalescing::ScatterToSampler(
1816
1816
1817
1817
assert (!load->getType ()->isVectorTy () || load->getType ()->getVectorNumElements () <= 4 );
1818
1818
1819
+ const bool useByteAddress = m_ctx->m_DriverInfo .UsesTypedConstantBuffersWithByteAddress ();
1820
+
1819
1821
// Code below doesn't support crossing 4 DWORD boundary i.e. mapping a
1820
1822
// single input load to multiple sampler loads.
1821
1823
const bool canBeLoadedUsingSampler =
@@ -1830,26 +1832,34 @@ void ConstantCoalescing::ScatterToSampler(
1830
1832
1831
1833
WIAnalysis::WIDependancy baseInBytesDep = wiAns->whichDepend (baseInBytes);
1832
1834
1833
- Value* baseAddressInOwords = GetSamplerAlignedAddress (baseInBytes);
1834
- assert (baseAddressInOwords);
1835
-
1836
- // it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1837
- if (baseAddressInOwords != baseInBytes->getOperand (0 ))
1838
- {
1839
- Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1840
- wiAns->incUpdateDepend (newVal, baseInBytesDep);
1841
- baseInBytes->replaceAllUsesWith (newVal);
1842
- }
1843
- else if (wiAns->whichDepend (baseAddressInOwords) != baseInBytesDep)
1835
+ // Data address for sampler load, either in OWORDs or in bytes
1836
+ Value* chunkBaseAddress = baseInBytes;
1837
+ if (!useByteAddress)
1844
1838
{
1845
- // quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1846
- // If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1847
- // we can change the uniformness of baseAddressInOwords
1848
- baseAddressInOwords = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 0 ));
1849
- wiAns->incUpdateDepend (baseAddressInOwords, baseInBytesDep);
1850
- Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1851
- wiAns->incUpdateDepend (newVal, baseInBytesDep);
1852
- baseInBytes->replaceAllUsesWith (newVal);
1839
+ // base address is in OWORDs
1840
+ Value* baseAddressInOwords = GetSamplerAlignedAddress (baseInBytes);
1841
+ assert (baseAddressInOwords);
1842
+
1843
+ // it is possible that baseInBytes is uniform, yet load is non-uniform due to the use location of load
1844
+ if (baseAddressInOwords != baseInBytes->getOperand (0 ))
1845
+ {
1846
+ Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1847
+ wiAns->incUpdateDepend (newVal, baseInBytesDep);
1848
+ baseInBytes->replaceAllUsesWith (newVal);
1849
+ }
1850
+ else if (wiAns->whichDepend (baseAddressInOwords) != baseInBytesDep)
1851
+ {
1852
+ // quick fix for a special case: baseAddressInOwords is uniform and baseInBytes is not uniform.
1853
+ // If we use baseInBytes-src0 (elementIndx) directly at cf-join point by this transform,
1854
+ // we can change the uniformness of baseAddressInOwords
1855
+ baseAddressInOwords = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 0 ));
1856
+ wiAns->incUpdateDepend (baseAddressInOwords, baseInBytesDep);
1857
+ Value* newVal = irBuilder->CreateShl (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), 4 ));
1858
+ wiAns->incUpdateDepend (newVal, baseInBytesDep);
1859
+ baseInBytes->replaceAllUsesWith (newVal);
1860
+ }
1861
+
1862
+ chunkBaseAddress = baseAddressInOwords;
1853
1863
}
1854
1864
BufChunk* cov_chunk = nullptr ;
1855
1865
for (std::vector<BufChunk*>::reverse_iterator rit = chunk_vec.rbegin (),
@@ -1858,7 +1868,7 @@ void ConstantCoalescing::ScatterToSampler(
1858
1868
BufChunk* cur_chunk = *rit;
1859
1869
// Look for an existing sampler load covering data range of the input load.
1860
1870
if (CompareBufferBase (cur_chunk->bufIdxV , cur_chunk->addrSpace , bufIdxV, addrSpace) &&
1861
- cur_chunk->baseIdxV == baseAddressInOwords )
1871
+ cur_chunk->baseIdxV == chunkBaseAddress )
1862
1872
{
1863
1873
const uint chunkStartInBytes = cur_chunk->chunkStart * cur_chunk->elementSize ;
1864
1874
const uint chunkEndInBytes = (cur_chunk->chunkStart + cur_chunk->chunkSize ) * cur_chunk->elementSize ;
@@ -1877,23 +1887,32 @@ void ConstantCoalescing::ScatterToSampler(
1877
1887
cov_chunk = new BufChunk ();
1878
1888
cov_chunk->bufIdxV = bufIdxV;
1879
1889
cov_chunk->addrSpace = addrSpace;
1880
- cov_chunk->baseIdxV = baseAddressInOwords;
1881
- cov_chunk->elementSize = samplerElementSizeInBytes;
1882
- cov_chunk->chunkStart = iSTD::RoundDown ((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords);
1883
- cov_chunk->chunkSize = samplerLoadSizeInDwords;
1890
+ cov_chunk->baseIdxV = chunkBaseAddress;
1891
+ cov_chunk->elementSize = samplerElementSizeInBytes; // 4 bytes
1892
+ cov_chunk->chunkStart = iSTD::RoundDown ((offsetInBytes / samplerElementSizeInBytes), samplerLoadSizeInDwords); // in DWORDS aligned to OWORDs
1893
+ cov_chunk->chunkSize = samplerLoadSizeInDwords; // in DWORDs
1894
+
1895
+ Value* dataAddress = chunkBaseAddress;
1884
1896
if (offsetInBytes >= samplerLoadSizeInBytes)
1885
1897
{
1886
- baseAddressInOwords = irBuilder->CreateAdd (baseAddressInOwords, ConstantInt::get (baseAddressInOwords->getType (), (offsetInBytes / samplerLoadSizeInBytes)));
1887
- wiAns->incUpdateDepend (baseAddressInOwords, WIAnalysis::RANDOM);
1898
+ const uint32_t chunkOffset = (useByteAddress) ?
1899
+ (cov_chunk->chunkStart * cov_chunk->elementSize ) : // in bytes
1900
+ (offsetInBytes / samplerLoadSizeInBytes); // in OWORDs
1901
+ dataAddress = irBuilder->CreateAdd (dataAddress, ConstantInt::get (dataAddress->getType (), chunkOffset));
1902
+ wiAns->incUpdateDepend (dataAddress, WIAnalysis::RANDOM);
1888
1903
}
1889
- if (baseAddressInOwords ->getType ()->getIntegerBitWidth () >= 32 )
1904
+ if (dataAddress ->getType ()->getIntegerBitWidth () >= 32 && !useByteAddress )
1890
1905
{
1891
- baseAddressInOwords = irBuilder->CreateAnd (baseAddressInOwords , ConstantInt::get (baseAddressInOwords ->getType (), 0x0FFFFFFF ));
1892
- wiAns->incUpdateDepend (baseAddressInOwords , WIAnalysis::RANDOM);
1906
+ dataAddress = irBuilder->CreateAnd (dataAddress , ConstantInt::get (dataAddress ->getType (), 0x0FFFFFFF ));
1907
+ wiAns->incUpdateDepend (dataAddress , WIAnalysis::RANDOM);
1893
1908
}
1894
- baseAddressInOwords = irBuilder->CreateZExtOrTrunc (baseAddressInOwords, irBuilder->getInt32Ty ());
1895
- wiAns->incUpdateDepend (baseAddressInOwords, WIAnalysis::RANDOM);
1896
- ld = CreateSamplerLoad (baseAddressInOwords, addrSpace);
1909
+ if (dataAddress->getType () != irBuilder->getInt32Ty ())
1910
+ {
1911
+ dataAddress = irBuilder->CreateZExtOrTrunc (dataAddress, irBuilder->getInt32Ty ());
1912
+ wiAns->incUpdateDepend (dataAddress, WIAnalysis::RANDOM);
1913
+ }
1914
+
1915
+ ld = CreateSamplerLoad (dataAddress, addrSpace);
1897
1916
cov_chunk->chunkIO = ld;
1898
1917
chunk_vec.push_back (cov_chunk);
1899
1918
}
0 commit comments