Skip to content

Commit 65f49f8

Browse files
vsemenov368igcbot
authored andcommitted
Enable internal LSC typed 2D intrinsics
.
1 parent 9dda44d commit 65f49f8

File tree

12 files changed

+210
-82
lines changed

12 files changed

+210
-82
lines changed

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,41 @@
839839
],
840840
"attributes" : "WriteMem", },
841841

842+
## ``llvm.vc.internal.lsc.*2d.typed.bti.*`` : LSC typed 2d block intrinsics
843+
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
844+
## * arg0: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
845+
## * arg1: i32, Surface
846+
## * arg2: i32, Block height [MBC]
847+
## * arg3: i32, Block width (in elements) [MBC]
848+
## * arg4: i32, Memory block X position (in bytes)
849+
## * arg5: i32, Memory block Y position
850+
## * arg6: data to write (store only)
851+
##
852+
## * Return value: the value read or void
853+
##
854+
"lsc_load_2d_tgm_bti" : { "result" : "anyvector",
855+
"arguments" : [
856+
"anyvector", # cache controls
857+
"int", # i32 BTI
858+
"int", # block height
859+
"int", # block width
860+
"int", # X offset
861+
"int" # Y offset
862+
],
863+
"attributes" : "ReadMem", },
864+
"lsc_store_2d_tgm_bti" : { "result" : "void",
865+
"arguments" : [
866+
"anyvector", # cache controls
867+
"int", # i32 BTI
868+
"int", # block height
869+
"int", # block width
870+
"int", # X offset
871+
"int", # Y offset
872+
"anyvector"
873+
],
874+
"attributes" : "WriteMem", },
875+
876+
842877
## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load intrinsic
843878
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
844879
## * arg0: vNi1 Predicate (overloaded)

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3826,7 +3826,7 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
38263826
// work around VISA spec pecularity: for typed messages width is in bytes
38273827
// not in elements
38283828
VectorType *VT;
3829-
constexpr int SrcOperandNum = 7; // to be in sync with json
3829+
constexpr int SrcOperandNum = 6; // to be in sync with json
38303830
switch (SubOpcode) {
38313831
case LSC_LOAD_BLOCK2D:
38323832
VT = cast<VectorType>(CI->getType());

IGC/VectorCompiler/lib/GenXCodeGen/GenXLegacyToLscTranslator.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -810,8 +810,8 @@ Value *GenXLegacyToLscTranslator::translateMediaLoadStore(CallInst &CI) const {
810810
IGC_ASSERT(IID == GenXIntrinsic::genx_media_ld ||
811811
IID == GenXIntrinsic::genx_media_st);
812812
auto IsLoad = IID == GenXIntrinsic::genx_media_ld;
813-
auto NewIID = IsLoad ? GenXIntrinsic::genx_lsc_load2d_typed_bti
814-
: GenXIntrinsic::genx_lsc_store2d_typed_bti;
813+
auto NewIID = IsLoad ? vc::InternalIntrinsic::lsc_load_2d_tgm_bti
814+
: vc::InternalIntrinsic::lsc_store_2d_tgm_bti;
815815

816816
auto *Modifier = cast<ConstantInt>(CI.getArgOperand(0));
817817
auto *BTI = CI.getArgOperand(1);
@@ -853,9 +853,18 @@ Value *GenXLegacyToLscTranslator::translateMediaLoadStore(CallInst &CI) const {
853853
IGC_ASSERT(Width % ESize == 0);
854854
IGC_ASSERT(DataSize % RoundedWidth == 0);
855855

856+
auto *CacheOpts = ConstantDataVector::getSplat(
857+
ST->getNumCacheLevels(), Builder.getInt8(LSC_CACHING_DEFAULT));
858+
859+
SmallVector<Type *, 2> Types;
860+
if (IsLoad)
861+
Types.push_back(VTy);
862+
Types.push_back(CacheOpts->getType());
863+
if (!IsLoad)
864+
Types.push_back(VTy);
865+
856866
SmallVector<Value *, 8> Args = {
857-
Builder.getInt8(0), // L1 cache control (default)
858-
Builder.getInt8(0), // L3 cache control (default)
867+
CacheOpts,
859868
BTI,
860869
Builder.getInt32(Height),
861870
Builder.getInt32(Width / ESize),
@@ -865,7 +874,8 @@ Value *GenXLegacyToLscTranslator::translateMediaLoadStore(CallInst &CI) const {
865874
if (!IsLoad)
866875
Args.push_back(Data);
867876

868-
auto *Func = GenXIntrinsic::getGenXDeclaration(CI.getModule(), NewIID, {VTy});
877+
auto *Func = vc::InternalIntrinsic::getInternalDeclaration(CI.getModule(),
878+
NewIID, Types);
869879
auto *I = Builder.CreateCall(Func, Args);
870880
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *I);
871881
return I;

IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,19 +1795,19 @@ bool GenXLowering::widenSIMD8GatherScatter(CallInst *CI, unsigned IID) {
17951795
* lowerLSCTyped2DBlock : handle padding for the typed 2d block messages
17961796
*/
17971797
bool GenXLowering::lowerLSCTyped2DBlock(CallInst *CI, unsigned IID) {
1798-
IGC_ASSERT(IID == GenXIntrinsic::genx_lsc_load2d_typed_bti ||
1799-
IID == GenXIntrinsic::genx_lsc_store2d_typed_bti);
1800-
1801-
auto *L1V = CI->getOperand(0);
1802-
auto *L3V = CI->getOperand(1);
1803-
auto *BTIV = CI->getOperand(2);
1804-
auto *HeightV = cast<ConstantInt>(CI->getOperand(3));
1805-
auto *WidthV = cast<ConstantInt>(CI->getOperand(4));
1806-
auto *XOffV = CI->getOperand(5);
1807-
auto *YOffV = CI->getOperand(6);
1798+
IGC_ASSERT(IID == vc::InternalIntrinsic::lsc_load_2d_tgm_bti ||
1799+
IID == vc::InternalIntrinsic::lsc_store_2d_tgm_bti);
1800+
1801+
auto *CacheOpts = CI->getOperand(0);
1802+
auto *BTIV = CI->getOperand(1);
1803+
auto *HeightV = cast<ConstantInt>(CI->getOperand(2));
1804+
auto *WidthV = cast<ConstantInt>(CI->getOperand(3));
1805+
auto *XOffV = CI->getOperand(4);
1806+
auto *YOffV = CI->getOperand(5);
18081807
Value *StoreDataV = nullptr;
1809-
if (IID == GenXIntrinsic::genx_lsc_store2d_typed_bti)
1810-
StoreDataV = CI->getOperand(7);
1808+
bool IsStore = IID == vc::InternalIntrinsic::lsc_store_2d_tgm_bti;
1809+
if (IsStore)
1810+
StoreDataV = CI->getOperand(6);
18111811

18121812
auto *DataTy = StoreDataV ? StoreDataV->getType() : CI->getType();
18131813
auto *VTy = cast<IGCLLVM::FixedVectorType>(DataTy);
@@ -1838,9 +1838,15 @@ bool GenXLowering::lowerLSCTyped2DBlock(CallInst *CI, unsigned IID) {
18381838
return false;
18391839

18401840
auto *TargetVTy = IGCLLVM::FixedVectorType::get(ElementTy, TargetElements);
1841-
auto IntrinsicID = static_cast<GenXIntrinsic::ID>(IID);
1842-
auto *Decl = GenXIntrinsic::getGenXDeclaration(CI->getModule(), IntrinsicID,
1843-
{TargetVTy});
1841+
1842+
SmallVector<Type *, 2> Types;
1843+
if (!IsStore)
1844+
Types.push_back(TargetVTy);
1845+
Types.push_back(CacheOpts->getType());
1846+
if (IsStore)
1847+
Types.push_back(TargetVTy);
1848+
1849+
auto *Decl = vc::getAnyDeclaration(CI->getModule(), IID, Types);
18441850

18451851
vc::CMRegion R(ElementTy);
18461852
R.NumElements = NElements;
@@ -1855,11 +1861,11 @@ bool GenXLowering::lowerLSCTyped2DBlock(CallInst *CI, unsigned IID) {
18551861
R.VStride = 0;
18561862
}
18571863

1858-
SmallVector<Value *, 8> Args = {L1V, L3V, BTIV, HeightV,
1859-
WidthV, XOffV, YOffV};
1864+
SmallVector<Value *, 7> Args = {CacheOpts, BTIV, HeightV,
1865+
WidthV, XOffV, YOffV};
18601866

18611867
switch (IID) {
1862-
case GenXIntrinsic::genx_lsc_load2d_typed_bti: {
1868+
case vc::InternalIntrinsic::lsc_load_2d_tgm_bti: {
18631869
auto *NewLoad = CallInst::Create(
18641870
Decl, Args, CI->getName() + VALUE_NAME(".padding"), CI);
18651871
NewLoad->setDebugLoc(CI->getDebugLoc());
@@ -1868,7 +1874,7 @@ bool GenXLowering::lowerLSCTyped2DBlock(CallInst *CI, unsigned IID) {
18681874
CI->getDebugLoc());
18691875
CI->replaceAllUsesWith(RdRgn);
18701876
} break;
1871-
case GenXIntrinsic::genx_lsc_store2d_typed_bti: {
1877+
case vc::InternalIntrinsic::lsc_store_2d_tgm_bti: {
18721878
IGC_ASSERT_EXIT(StoreDataV);
18731879
auto *WrRgn = R.createWrRegion(UndefValue::get(TargetVTy), StoreDataV,
18741880
StoreDataV->getName() + ".wrregion", CI,
@@ -2059,8 +2065,8 @@ bool GenXLowering::processInst(Instruction *Inst) {
20592065
ToErase.push_back(Inst);
20602066
return true;
20612067
}
2062-
case GenXIntrinsic::genx_lsc_load2d_typed_bti:
2063-
case GenXIntrinsic::genx_lsc_store2d_typed_bti:
2068+
case vc::InternalIntrinsic::lsc_load_2d_tgm_bti:
2069+
case vc::InternalIntrinsic::lsc_store_2d_tgm_bti:
20642070
return lowerLSCTyped2DBlock(CI, IntrinsicID);
20652071
case GenXIntrinsic::genx_ssmul:
20662072
case GenXIntrinsic::genx_sumul:

IGC/VectorCompiler/lib/GenXCodeGen/Utils/cisa_gen_intrinsics.json

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,34 @@
882882
"OffsetY": ["INT", 8],
883883
"Src": ["RAW", 9]
884884
},
885+
"vc::InternalIntrinsic::lsc_load_2d_tgm_bti" : {
886+
"opc": "ISA_LSC_TYPED",
887+
"gen_opc": "ISA_LSC_TYPED_2D",
888+
"sub_opc": [ "LITERAL", "LSC_LOAD_BLOCK2D" ],
889+
"address_model": [ "LITERAL", "LSC_ADDR_TYPE_BTI" ],
890+
"Dst": [ "RAW", 0 ],
891+
"CacheOpts": ["CACHEOPTS", 1],
892+
"Surface": [ "GENERAL", "UNSIGNED", 2 ],
893+
"BlockHeight": [ "INT", 3 ],
894+
"BlockWidth": [ "INT", 4 ],
895+
"XOff": [ "GENERAL", "UNSIGNED", 5 ],
896+
"YOff": [ "GENERAL", "UNSIGNED", 6 ],
897+
"Src": [ "NULLRAW" ]
898+
},
899+
"vc::InternalIntrinsic::lsc_store_2d_tgm_bti" : {
900+
"opc": "ISA_LSC_TYPED",
901+
"gen_opc": "ISA_LSC_TYPED_2D",
902+
"sub_opc": [ "LITERAL", "LSC_STORE_BLOCK2D" ],
903+
"address_model": [ "LITERAL", "LSC_ADDR_TYPE_BTI" ],
904+
"Dst": [ "NULLRAW" ],
905+
"CacheOpts": ["CACHEOPTS", 1],
906+
"Surface": [ "GENERAL", "UNSIGNED", 2 ],
907+
"BlockHeight": [ "INT", 3 ],
908+
"BlockWidth": [ "INT", 4 ],
909+
"XOff": [ "GENERAL", "UNSIGNED", 5 ],
910+
"YOff": [ "GENERAL", "UNSIGNED", 6 ],
911+
"Src": [ "RAW", 7 ]
912+
},
885913
// vc-intrinsics
886914
"genx_fptosi_sat": {
887915
"opc": "ISA_MOV",
@@ -3818,36 +3846,6 @@
38183846
"barrier_id": [ "GENERAL", 2 ],
38193847
"thread_count": [ "GENERAL", 3 ]
38203848
},
3821-
"genx_lsc_load2d_typed_bti" : {
3822-
"opc": "ISA_LSC_TYPED",
3823-
"gen_opc": "ISA_LSC_TYPED_2D",
3824-
"sub_opc": [ "LITERAL", "LSC_LOAD_BLOCK2D" ],
3825-
"address_model": [ "LITERAL", "LSC_ADDR_TYPE_BTI" ],
3826-
"Dst": [ "RAW", 0 ],
3827-
"L1_Hints": [ "BYTE", 1 ],
3828-
"L3_Hints": [ "BYTE", 2 ],
3829-
"Surface": [ "GENERAL", "UNSIGNED", 3 ],
3830-
"BlockHeight": [ "BYTE", 4 ],
3831-
"BlockWidth": [ "BYTE", 5 ],
3832-
"XOff": [ "GENERAL", "UNSIGNED", 6 ],
3833-
"YOff": [ "GENERAL", "UNSIGNED", 7 ],
3834-
"Src": [ "NULLRAW" ]
3835-
},
3836-
"genx_lsc_store2d_typed_bti" : {
3837-
"opc": "ISA_LSC_TYPED",
3838-
"gen_opc": "ISA_LSC_TYPED_2D",
3839-
"sub_opc": [ "LITERAL", "LSC_STORE_BLOCK2D" ],
3840-
"address_model": [ "LITERAL", "LSC_ADDR_TYPE_BTI" ],
3841-
"Dst": [ "NULLRAW" ],
3842-
"L1_Hints": [ "BYTE", 1 ],
3843-
"L3_Hints": [ "BYTE", 2 ],
3844-
"Surface": [ "GENERAL", "UNSIGNED", 3 ],
3845-
"BlockHeight": [ "BYTE", 4 ],
3846-
"BlockWidth": [ "BYTE", 5 ],
3847-
"XOff": [ "GENERAL", "UNSIGNED", 6 ],
3848-
"YOff": [ "GENERAL", "UNSIGNED", 7 ],
3849-
"Src": [ "RAW", 8 ]
3850-
},
38513849
"genx_lsc_fence": {
38523850
"opc": "ISA_LSC_FENCE",
38533851
"exec_size": [ "EXECSIZE_FROM_ARG", 1 ],
@@ -3994,12 +3992,11 @@
39943992
],
39953993
"ISA_LSC_TYPED_2D": [
39963994
"LSC_DATA_SHAPE_TYPED_BLOCK2D dataShape2D",
3997-
"LSC_CACHE_OPTS cache {(LSC_CACHE_OPT) L1_Hints, (LSC_CACHE_OPT) L3_Hints}",
39983995
"dataShape2D.height = BlockHeight",
39993996
"dataShape2D.width = BlockWidth",
40003997
[ "CreateLscTyped2D",
40013998
"sub_opc",
4002-
"cache",
3999+
"CacheOpts",
40034000
"address_model",
40044001
"Surface",
40054002
"dataShape2D",

IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXTranslateIntrinsics.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class GenXTranslateIntrinsics final
5959
Value *translateLscLoadStoreBlock2D(CallInst &I) const;
6060
Value *translateLscLoadStore2DDesc(CallInst &I) const;
6161
Value *translateLscTyped(CallInst &I) const;
62+
Value *translateLscTyped2D(CallInst &I) const;
6263
};
6364
} // namespace
6465

@@ -157,6 +158,11 @@ void GenXTranslateIntrinsics::visitCallInst(CallInst &I) const {
157158
case GenXIntrinsic::genx_lsc_store_2d_ugm_desc:
158159
NewI = translateLscLoadStore2DDesc(I);
159160
break;
161+
case GenXIntrinsic::genx_lsc_load2d_typed_bti:
162+
case GenXIntrinsic::genx_lsc_store2d_typed_bti:
163+
case GenXIntrinsic::genx_lsc_prefetch2d_typed_bti:
164+
NewI = translateLscTyped2D(I);
165+
break;
160166
case GenXIntrinsic::genx_lsc_load_merge_quad_typed_bti:
161167
case GenXIntrinsic::genx_lsc_prefetch_quad_typed_bti:
162168
case GenXIntrinsic::genx_lsc_store_quad_typed_bti:
@@ -653,3 +659,63 @@ Value *GenXTranslateIntrinsics::translateLscTyped(CallInst &I) const {
653659

654660
return NewI;
655661
}
662+
663+
Value *GenXTranslateIntrinsics::translateLscTyped2D(CallInst &I) const {
664+
auto IID = GenXIntrinsic::getGenXIntrinsicID(&I);
665+
LLVM_DEBUG(dbgs() << "Translate: " << I << "\n");
666+
IRBuilder<> Builder(&I);
667+
Module *M = I.getModule();
668+
669+
auto *L1Control = cast<Constant>(I.getArgOperand(0));
670+
auto *L3Control = cast<Constant>(I.getArgOperand(1));
671+
auto *CacheOpts = translateCacheControls(L1Control, L3Control);
672+
673+
auto *BTI = I.getArgOperand(2);
674+
auto *BlockHeight = I.getArgOperand(3);
675+
auto *BlockWidth = I.getArgOperand(4);
676+
auto *X = I.getArgOperand(5);
677+
auto *Y = I.getArgOperand(6);
678+
679+
Value *Src = nullptr;
680+
auto *Ty = I.getType();
681+
682+
auto NewIID = vc::InternalIntrinsic::not_internal_intrinsic;
683+
684+
switch (IID) {
685+
default:
686+
IGC_ASSERT_UNREACHABLE();
687+
case GenXIntrinsic::genx_lsc_load2d_typed_bti:
688+
Src = UndefValue::get(Ty);
689+
NewIID = vc::InternalIntrinsic::lsc_load_2d_tgm_bti;
690+
break;
691+
case GenXIntrinsic::genx_lsc_store2d_typed_bti:
692+
Src = I.getArgOperand(7);
693+
NewIID = vc::InternalIntrinsic::lsc_store_2d_tgm_bti;
694+
break;
695+
}
696+
697+
SmallVector<Type *, 2> Types;
698+
if (!Ty->isVoidTy())
699+
Types.push_back(Ty);
700+
Types.push_back(CacheOpts->getType());
701+
if (Src && Ty->isVoidTy())
702+
Types.push_back(Src->getType());
703+
704+
auto *Func = vc::InternalIntrinsic::getInternalDeclaration(M, NewIID, Types);
705+
706+
SmallVector<Value *, 6> Args = {
707+
CacheOpts,
708+
BTI,
709+
BlockWidth,
710+
BlockHeight,
711+
X,
712+
Y,
713+
};
714+
if (Src)
715+
Args.push_back(Src);
716+
717+
auto *NewI = Builder.CreateCall(Func, Args);
718+
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *NewI);
719+
720+
return NewI;
721+
}

IGC/VectorCompiler/lib/InternalIntrinsics/InternalIntrinsics.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,8 @@ bool InternalIntrinsic::isInternalMemoryIntrinsic(InternalIntrinsic::ID id) {
735735
case InternalIntrinsic::lsc_load_2d_ugm_desc_vnni:
736736
case InternalIntrinsic::lsc_prefetch_2d_ugm_desc:
737737
case InternalIntrinsic::lsc_store_2d_ugm_desc:
738+
case InternalIntrinsic::lsc_load_2d_tgm_bti:
739+
case InternalIntrinsic::lsc_store_2d_tgm_bti:
738740
return true;
739741
}
740742

@@ -901,6 +903,15 @@ InternalIntrinsic::getMemoryRegisterElementSize(const llvm::Instruction *I) {
901903
auto *Ty = LastArg->getType();
902904
return Ty->getScalarType()->getPrimitiveSizeInBits();
903905
} break;
906+
case InternalIntrinsic::lsc_store_2d_tgm_bti: {
907+
auto *LastArg = I->getOperand(6);
908+
auto *Ty = LastArg->getType();
909+
return Ty->getScalarType()->getPrimitiveSizeInBits();
910+
} break;
911+
case InternalIntrinsic::lsc_load_2d_tgm_bti: {
912+
auto *Ty = I->getType();
913+
return Ty->getScalarType()->getPrimitiveSizeInBits();
914+
} break;
904915
}
905916

906917
auto *ElementSize = cast<ConstantInt>(I->getOperand(ElementSizeIndex));
@@ -938,6 +949,9 @@ int InternalIntrinsic::getMemoryCacheControlOperandIndex(unsigned IID) {
938949
case InternalIntrinsic::lsc_prefetch_2d_ugm_desc:
939950
case InternalIntrinsic::lsc_store_2d_ugm_desc:
940951
return 1;
952+
case InternalIntrinsic::lsc_load_2d_tgm_bti:
953+
case InternalIntrinsic::lsc_store_2d_tgm_bti:
954+
return 0;
941955
default:
942956
break;
943957
}

0 commit comments

Comments
 (0)