@@ -598,6 +598,21 @@ bool VectorPreProcess::splitStore(
598
598
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
599
599
{
600
600
// byte and word-aligned stores can only store a dword at a time.
601
+ unsigned int alignment = ASI.getAlignment ();
602
+ if (isStoreInst && alignment < 4 )
603
+ {
604
+ uint32_t newAlign = getKnownAlignment (ASI.getPointerOperand (), *m_DL);
605
+ if (newAlign > alignment)
606
+ {
607
+ // For the same reason as Load, use DW-aligned for OCL stateful.
608
+ StoreInst* aSI = dyn_cast<StoreInst>(SI);
609
+ if (aSI && newAlign > 4 && isStatefulAddrSpace (aSI->getPointerAddressSpace ()))
610
+ {
611
+ newAlign = 4 ;
612
+ }
613
+ ASI.setAlignment (newAlign);
614
+ }
615
+ }
601
616
bool needsDWordSplit =
602
617
(!isStoreInst ||
603
618
m_CGCtx->m_DriverInfo .splitUnalignedVectors () ||
@@ -753,6 +768,28 @@ bool VectorPreProcess::splitLoad(
753
768
if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
754
769
{
755
770
// byte and word-aligned loads can only load a dword at a time.
771
+ unsigned int alignment = ALI.getAlignment ();
772
+ if (!isLdRaw && alignment < 4 )
773
+ {
774
+ uint32_t newAlign = getKnownAlignment (ALI.getPointerOperand (), *m_DL);
775
+ if (newAlign > alignment)
776
+ {
777
+ // For OCL stateful, the base can be as little as DW-aligned. To be safe,
778
+ // need to use DW-aligned. For example,
779
+ // % 0 = add i32 0, 16
780
+ // % 4 = inttoptr i32 % 0 to <8 x i16> addrspace(131073) *
781
+ // %5 = load <8 x i16>, <8 x i16> addrspace(131073) * %4, align 2
782
+ // newAlign from getKnownAlignment() is 16. But we can only set align to 4 as
783
+ // the base of this stateful could be just DW-aligned.
784
+ LoadInst* aLI = dyn_cast<LoadInst>(LI);
785
+ if (aLI && newAlign > 4 && isStatefulAddrSpace (aLI->getPointerAddressSpace ()))
786
+ {
787
+ newAlign = 4 ;
788
+ }
789
+ ALI.setAlignment (newAlign);
790
+ }
791
+ }
792
+
756
793
if ((isLdRaw || !WI.isUniform (ALI.getInst ())) && ALI.getAlignment () < 4 )
757
794
splitSize = 4 ;
758
795
}
@@ -1540,4 +1577,4 @@ bool VectorPreProcess::runOnFunction(Function& F)
1540
1577
m_WorkList.clear ();
1541
1578
}
1542
1579
return changed;
1543
- }
1580
+ }
0 commit comments