Skip to content

Commit 09a2f59

Browse files
vsemenov368igcbot
authored andcommitted
Enable internal LSC cmask typed intrinsics
.
1 parent f6dec22 commit 09a2f59

File tree

8 files changed

+195
-128
lines changed

8 files changed

+195
-128
lines changed

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -877,58 +877,54 @@
877877
## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load intrinsic
878878
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
879879
## * arg0: vNi1 Predicate (overloaded)
880-
## * arg1: i8 L1 cache controls [MBC]
881-
## * arg2: i8 L3 cache controls [MBC]
882-
## * arg3: i8 Channel mask [MBC]
883-
## * arg4: i32 BTI
884-
## * arg5: vNi32 U pixel indices (overloaded)
885-
## * arg6: vNi32 V pixel indices
886-
## * arg7: vNi32 R pixel indices
887-
## * arg8: vNi32 LOD pixel indices
888-
## * arg9: vector to take values for masked simd lanes from (load)
880+
## * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC]
881+
## * arg2: i8 Channel mask [MBC]
882+
## * arg3: i32 BTI
883+
## * arg4: vNi32 U pixel indices (overloaded)
884+
## * arg5: vNi32 V pixel indices
885+
## * arg6: vNi32 R pixel indices
886+
## * arg7: vNi32 LOD pixel indices
887+
## * arg8: vector to take values for masked simd lanes from (load)
889888
## vector to take values to write (store)
890889
##
891890
## * Return value: the value read from memory (load) or void (store, prefetch)
892891
##
893892
"lsc_load_quad_tgm": { "result": "anyvector",
894893
"arguments": [
895-
"anyint", # vNxi1, predicate
896-
"char", # L1 cache control
897-
"char", # L3 cache control
898-
"char", # channel mask
899-
"int", # i32 BTI
900-
"anyint", # vNi32 U pixel index
901-
2, # vNi32 V pixel index
902-
2, # vNi32 R pixel index
903-
2, # vNi32 LOD pixel index
904-
0, # passthru value
894+
"anyint", # vNxi1, predicate
895+
"anyvector", # cache controls
896+
"char", # channel mask
897+
"int", # i32 BTI
898+
"anyint", # vNi32 U pixel index
899+
3, # vNi32 V pixel index
900+
3, # vNi32 R pixel index
901+
3, # vNi32 LOD pixel index
902+
0, # passthru value
905903
],
906904
"attributes": "ReadMem", },
907905
"lsc_store_quad_tgm": { "result": "void",
908906
"arguments": [
909907
"anyint", # vNxi1, predicate
910-
"char", # L1 cache control
911-
"char", # L3 cache control
908+
"anyvector", # cache controls
912909
"char", # channel mask
913910
"int", # i32 BTI
914911
"anyint", # vNi32 U pixel index
915-
1, # vNi32 V pixel index
916-
1, # vNi32 R pixel index
917-
1, # vNi32 LOD pixel index
912+
2, # vNi32 V pixel index
913+
2, # vNi32 R pixel index
914+
2, # vNi32 LOD pixel index
918915
"anyvector", # data to write
919916
],
920917
"attributes": "WriteMem", },
921918
"lsc_prefetch_quad_tgm": { "result": "void",
922919
"arguments": [
923920
"anyint", # vNxi1, predicate
924-
"char", # L1 cache control
925-
"char", # L3 cache control
921+
"anyvector", # cache controls
926922
"char", # channel mask
927923
"int", # i32 BTI
928924
"anyint", # vNi32 U pixel index
929-
1, # vNi32 V pixel index
930-
1, # vNi32 R pixel index
931-
1, # vNi32 LOD pixel index
925+
2, # vNi32 V pixel index
926+
2, # vNi32 R pixel index
927+
2, # vNi32 LOD pixel index
932928
],
933929
"attributes": "SideEffects", },
934930

IGC/VectorCompiler/lib/GenXCodeGen/Utils/cisa_gen_intrinsics.json

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -653,49 +653,46 @@
653653
"gen_opc": "ISA_LSC_TYPED_LOAD",
654654
"exec_size": ["EXECSIZE_FROM_ARG", 1],
655655
"pred": ["PREDICATION", 1],
656-
"L1_Hints": ["BYTE", 2],
657-
"L3_Hints": ["BYTE", 3],
658-
"ChMask": ["BYTE", 4],
656+
"CacheOpts": ["CACHEOPTS", 2],
657+
"ChMask": ["BYTE", 3],
659658
"AddrType": ["LITERAL", "LSC_ADDR_TYPE_BTI"],
660-
"Surface": ["GENERAL", "UNSIGNED", 5],
659+
"Surface": ["GENERAL", "UNSIGNED", 4],
661660
"Dst": ["RAW", 0],
662-
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
663-
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
664-
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8],
665-
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 9],
666-
"twoaddr": ["TWOADDR", 10]
661+
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 5],
662+
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
663+
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
664+
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8],
665+
"twoaddr": ["TWOADDR", 9]
667666
},
668667
"vc::InternalIntrinsic::lsc_store_quad_tgm" : {
669668
"opc": "ISA_LSC_TYPED",
670669
"gen_opc": "ISA_LSC_TYPED_STORE",
671670
"exec_size": ["EXECSIZE_FROM_ARG", 1],
672671
"pred": ["PREDICATION", 1],
673-
"L1_Hints": ["BYTE", 2],
674-
"L3_Hints": ["BYTE", 3],
675-
"ChMask": ["BYTE", 4],
672+
"CacheOpts": ["CACHEOPTS", 2],
673+
"ChMask": ["BYTE", 3],
676674
"AddrType": ["LITERAL", "LSC_ADDR_TYPE_BTI"],
677-
"Surface": ["GENERAL", "UNSIGNED", 5],
678-
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
679-
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
680-
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8],
681-
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 9],
682-
"Src": ["RAW", 10]
675+
"Surface": ["GENERAL", "UNSIGNED", 4],
676+
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 5],
677+
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
678+
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
679+
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8],
680+
"Src": ["RAW", 9]
683681
},
684682
"vc::InternalIntrinsic::lsc_prefetch_quad_tgm" : {
685683
"opc": "ISA_LSC_TYPED",
686684
"gen_opc": "ISA_LSC_TYPED_LOAD",
687685
"exec_size": ["EXECSIZE_FROM_ARG", 1],
688686
"pred": ["PREDICATION", 1],
689-
"L1_Hints": ["BYTE", 2],
690-
"L3_Hints": ["BYTE", 3],
691-
"ChMask": ["BYTE", 4],
687+
"CacheOpts": ["CACHEOPTS", 2],
688+
"ChMask": ["BYTE", 3],
692689
"AddrType": ["LITERAL", "LSC_ADDR_TYPE_BTI"],
693-
"Surface": ["GENERAL", "UNSIGNED", 5],
690+
"Surface": ["GENERAL", "UNSIGNED", 4],
694691
"Dst": ["NULLRAW"],
695-
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
696-
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
697-
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8],
698-
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 9]
692+
"AddrsU": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 5],
693+
"AddrsV": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 6],
694+
"AddrsR": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 7],
695+
"AddrsLOD": ["RAW", "UNSIGNED", "RAW_NULLALLOWED", 8]
699696
},
700697
"vc::InternalIntrinsic::lsc_load_block_2d_ugm" : {
701698
"opc": "ISA_LSC_UNTYPED",
@@ -3957,12 +3954,11 @@
39573954
]
39583955
],
39593956
"ISA_LSC_TYPED_LOAD": [
3960-
"LSC_CACHE_OPTS cache {(LSC_CACHE_OPT) L1_Hints, (LSC_CACHE_OPT) L3_Hints}",
39613957
[ "CreateLscTypedLoadQuad",
39623958
"pred",
39633959
"exec_size",
39643960
"exec_mask",
3965-
"cache",
3961+
"CacheOpts",
39663962
"LSC_DATA_CHMASK(ChMask)",
39673963
"LSC_ADDR_TYPE(AddrType)",
39683964
"Surface",
@@ -3974,12 +3970,11 @@
39743970
]
39753971
],
39763972
"ISA_LSC_TYPED_STORE": [
3977-
"LSC_CACHE_OPTS cache {(LSC_CACHE_OPT) L1_Hints, (LSC_CACHE_OPT) L3_Hints}",
39783973
[ "CreateLscTypedStoreQuad",
39793974
"pred",
39803975
"exec_size",
39813976
"exec_mask",
3982-
"cache",
3977+
"CacheOpts",
39833978
"LSC_DATA_CHMASK(ChMask)",
39843979
"LSC_ADDR_TYPE(AddrType)",
39853980
"Surface",

IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXTranslateIntrinsics.cpp

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,24 +688,64 @@ Value *GenXTranslateIntrinsics::translateLscTyped(CallInst &I) const {
688688
IRBuilder<> Builder(&I);
689689
Module *M = I.getModule();
690690

691+
auto *Pred = I.getArgOperand(0);
692+
693+
auto *L1Control = cast<Constant>(I.getArgOperand(1));
694+
auto *L3Control = cast<Constant>(I.getArgOperand(2));
695+
auto *CacheOpts = translateCacheControls(L1Control, L3Control);
696+
697+
auto *ChannelMask = I.getArgOperand(3);
698+
auto *BTI = I.getArgOperand(4);
699+
auto *U = I.getArgOperand(5);
700+
auto *V = I.getArgOperand(6);
701+
auto *R = I.getArgOperand(7);
702+
auto *LOD = I.getArgOperand(8);
703+
704+
Value *Src = nullptr;
705+
auto *Ty = I.getType();
706+
691707
auto NewIID = vc::InternalIntrinsic::not_internal_intrinsic;
692708

693709
switch (IID) {
694710
default:
695711
IGC_ASSERT_UNREACHABLE();
696712
case GenXIntrinsic::genx_lsc_load_merge_quad_typed_bti:
713+
Src = I.getArgOperand(9);
697714
NewIID = vc::InternalIntrinsic::lsc_load_quad_tgm;
698715
break;
699716
case GenXIntrinsic::genx_lsc_prefetch_quad_typed_bti:
700717
NewIID = vc::InternalIntrinsic::lsc_prefetch_quad_tgm;
701718
break;
702719
case GenXIntrinsic::genx_lsc_store_quad_typed_bti:
720+
Src = I.getArgOperand(9);
703721
NewIID = vc::InternalIntrinsic::lsc_store_quad_tgm;
704722
break;
705723
}
706724

707-
SmallVector<Value *, 10> Args(I.args());
708-
auto *Func = vc::getAnyDeclarationForArgs(M, NewIID, I.getType(), Args);
725+
SmallVector<Type *, 4> Types;
726+
if (!Ty->isVoidTy())
727+
Types.push_back(Ty);
728+
Types.push_back(Pred->getType());
729+
Types.push_back(CacheOpts->getType());
730+
Types.push_back(U->getType());
731+
if (Src && Ty->isVoidTy())
732+
Types.push_back(Src->getType());
733+
734+
auto *Func = vc::InternalIntrinsic::getInternalDeclaration(M, NewIID, Types);
735+
736+
SmallVector<Value *, 9> Args = {
737+
Pred,
738+
CacheOpts,
739+
ChannelMask,
740+
BTI,
741+
U,
742+
V,
743+
R,
744+
LOD,
745+
};
746+
if (Src)
747+
Args.push_back(Src);
748+
709749
auto *NewI = Builder.CreateCall(Func, Args);
710750
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *NewI);
711751

IGC/VectorCompiler/lib/InternalIntrinsics/InternalIntrinsics.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -831,9 +831,6 @@ InternalIntrinsic::getMemoryVectorSizePerLane(const llvm::Instruction *I) {
831831

832832
IGC_ASSERT_UNREACHABLE();
833833
}
834-
case InternalIntrinsic::lsc_load_quad_tgm:
835-
case InternalIntrinsic::lsc_prefetch_quad_tgm:
836-
case InternalIntrinsic::lsc_store_quad_tgm:
837834
case InternalIntrinsic::lsc_load_quad_bti:
838835
case InternalIntrinsic::lsc_load_quad_bss:
839836
case InternalIntrinsic::lsc_load_quad_slm:
@@ -851,6 +848,16 @@ InternalIntrinsic::getMemoryVectorSizePerLane(const llvm::Instruction *I) {
851848
IGC_ASSERT(Size > 0 && Size <= 4);
852849
return Size;
853850
}
851+
case InternalIntrinsic::lsc_load_quad_tgm:
852+
case InternalIntrinsic::lsc_prefetch_quad_tgm:
853+
case InternalIntrinsic::lsc_store_quad_tgm: {
854+
auto *ChannelMask = cast<ConstantInt>(I->getOperand(2));
855+
auto Mask = ChannelMask->getZExtValue();
856+
auto Size = countPopulation(Mask);
857+
IGC_ASSERT(Size > 0 && Size <= 4);
858+
return Size;
859+
}
860+
854861
}
855862

856863
return 1;
@@ -948,6 +955,9 @@ int InternalIntrinsic::getMemoryCacheControlOperandIndex(unsigned IID) {
948955
case InternalIntrinsic::lsc_load_2d_ugm_desc_vnni:
949956
case InternalIntrinsic::lsc_prefetch_2d_ugm_desc:
950957
case InternalIntrinsic::lsc_store_2d_ugm_desc:
958+
case InternalIntrinsic::lsc_load_quad_tgm:
959+
case InternalIntrinsic::lsc_store_quad_tgm:
960+
case InternalIntrinsic::lsc_prefetch_quad_tgm:
951961
return 1;
952962
case InternalIntrinsic::lsc_load_2d_tgm_bti:
953963
case InternalIntrinsic::lsc_store_2d_tgm_bti:

IGC/VectorCompiler/test/CisaBuilder/lsc_indirect.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
;=========================== begin_copyright_notice ============================
22
;
3-
; Copyright (C) 2023 Intel Corporation
3+
; Copyright (C) 2023-2024 Intel Corporation
44
;
55
; SPDX-License-Identifier: MIT
66
;
@@ -13,7 +13,7 @@ declare i32 @llvm.genx.rdregioni.i32.v2i32.i16(<2 x i32>, i32, i32, i32, i16, i3
1313
declare !genx_intrinsic_id !28 <64 x i8> @llvm.vc.internal.lsc.load.2d.tgm.bti.v64i8.v2i8(<2 x i8>, i32, i32, i32, i32, i32) #2
1414
declare !genx_intrinsic_id !29 void @llvm.vc.internal.lsc.store.2d.tgm.bti.v2i8.v64i8(<2 x i8>, i32, i32, i32, i32, i32, <64 x i8>) #3
1515

16-
declare void @llvm.vc.internal.lsc.store.quad.tgm.v4i1.v4i32.v4i32(<4 x i1>, i8, i8, i8, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) #3
16+
declare void @llvm.vc.internal.lsc.store.quad.tgm.v4i1.v2i8.v4i32.v4i32(<4 x i1>, <2 x i8>, i8, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) #3
1717

1818
define dllexport spir_kernel void @test_genx(<2 x i32> %base, <2 x i32> %x, <2 x i32> %y, i16 %offset) local_unnamed_addr #0 {
1919
%offset.new = add i16 %offset, 4
@@ -32,7 +32,7 @@ define spir_kernel void @test_internal(<4 x i32> %x, <2 x i32> %base, i16 %offse
3232
%offset.new = add i16 %offset, 4
3333
%base.new = tail call i32 @llvm.genx.rdregioni.i32.v2i32.i16(<2 x i32> %base, i32 0, i32 1, i32 1, i16 %offset.new, i32 0)
3434
; CHECK: lsc_store_quad.tgm (M1, 4) bti(V{{[0-9]+}})
35-
call void @llvm.vc.internal.lsc.store.quad.tgm.v4i1.v4i32.v4i32(<4 x i1> <i1 true, i1 true, i1 true, i1 true>, i8 0, i8 0, i8 1, i32 %base.new, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x)
35+
call void @llvm.vc.internal.lsc.store.quad.tgm.v4i1.v2i8.v4i32.v4i32(<4 x i1> <i1 true, i1 true, i1 true, i1 true>, <2 x i8> zeroinitializer, i8 1, i32 %base.new, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x, <4 x i32> %x)
3636
ret void
3737
}
3838

0 commit comments

Comments
 (0)