Skip to content

Commit 1b0e95a

Browse files
jgu222sys_zuul
authored andcommitted
Allow alignment improvement for all cases except stateful accesses, in which
the base of stateful surface could be as little as DW, thus we can assume align 4 on stateful messages. Change-Id: Id264f87dff73da7c73d31cd4ba7bed64711f17f8
1 parent 15584d1 commit 1b0e95a

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

IGC/Compiler/CISACodeGen/VectorPreProcess.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,21 @@ bool VectorPreProcess::splitStore(
598598
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
599599
{
600600
// byte and word-aligned stores can only store a dword at a time.
601+
unsigned int alignment = ASI.getAlignment();
602+
if (isStoreInst && alignment < 4)
603+
{
604+
uint32_t newAlign = getKnownAlignment(ASI.getPointerOperand(), *m_DL);
605+
if (newAlign > alignment)
606+
{
607+
// For the same reason as Load, use DW-aligned for OCL stateful.
608+
StoreInst* aSI = dyn_cast<StoreInst>(SI);
609+
if (aSI && newAlign > 4 && isStatefulAddrSpace(aSI->getPointerAddressSpace()))
610+
{
611+
newAlign = 4;
612+
}
613+
ASI.setAlignment(newAlign);
614+
}
615+
}
601616
bool needsDWordSplit =
602617
(!isStoreInst ||
603618
m_CGCtx->m_DriverInfo.splitUnalignedVectors() ||
@@ -753,6 +768,28 @@ bool VectorPreProcess::splitLoad(
753768
if (IGC_IS_FLAG_ENABLED(EnableSplitUnalignedVector))
754769
{
755770
// byte and word-aligned loads can only load a dword at a time.
771+
unsigned int alignment = ALI.getAlignment();
772+
if (!isLdRaw && alignment < 4)
773+
{
774+
uint32_t newAlign = getKnownAlignment(ALI.getPointerOperand(), *m_DL);
775+
if (newAlign > alignment)
776+
{
777+
// For OCL stateful, the base can be as little as DW-aligned. To be safe,
778+
// need to use DW-aligned. For example,
779+
// % 0 = add i32 0, 16
780+
// % 4 = inttoptr i32 % 0 to <8 x i16> addrspace(131073) *
781+
// %5 = load <8 x i16>, <8 x i16> addrspace(131073) * %4, align 2
782+
// newAlign from getKnownAlignment() is 16. But we can only set align to 4 as
783+
// the base of this stateful could be just DW-aligned.
784+
LoadInst* aLI = dyn_cast<LoadInst>(LI);
785+
if (aLI && newAlign > 4 && isStatefulAddrSpace(aLI->getPointerAddressSpace()))
786+
{
787+
newAlign = 4;
788+
}
789+
ALI.setAlignment(newAlign);
790+
}
791+
}
792+
756793
if ((isLdRaw || !WI.isUniform(ALI.getInst())) && ALI.getAlignment() < 4)
757794
splitSize = 4;
758795
}
@@ -1540,4 +1577,4 @@ bool VectorPreProcess::runOnFunction(Function& F)
15401577
m_WorkList.clear();
15411578
}
15421579
return changed;
1543-
}
1580+
}

IGC/Compiler/CISACodeGen/helper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ namespace IGC
100100
return temp.u32Val;
101101
}
102102

103+
// Return true if AS is for a stateful surface.
104+
// Stateful surface should have an encoded AS that is bigger than
105+
// ADDRESS_SPACE_NUM_ADDRESSES.
106+
bool isStatefulAddrSpace(unsigned AS)
107+
{
108+
return AS > ADDRESS_SPACE_NUM_ADDRESSES;
109+
}
110+
103111
bool isDummyBasicBlock(llvm::BasicBlock* BB)
104112
{
105113
if (BB->size() != 1)

IGC/Compiler/CISACodeGen/helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ namespace IGC
183183
unsigned uniqueIndAS);
184184

185185
unsigned SetBufferAsBindless(unsigned addressSpaceOfPtr, BufferType bufferType);
186+
bool isStatefulAddrSpace(unsigned AS);
186187

187188
BufferType DecodeAS4GFXResource(unsigned addrSpace, bool& directIdx, unsigned& bufId);
188189
int getConstantBufferLoadOffset(llvm::LoadInst* ld);

0 commit comments

Comments
 (0)