Skip to content

Commit e3333c9

Browse files
vmustyaigcbot
authored andcommitted
Add VC support for Arrow Lake and Lunar Lake platforms
.
1 parent 0fa85c9 commit e3333c9

File tree

29 files changed

+1460
-8
lines changed

29 files changed

+1460
-8
lines changed

IGC/VectorCompiler/cmake/supported_platforms_list.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ set(SUPPORTED_VC_PLATFORMS
1515
"XeHP"
1616
"XeHPG"
1717
"XeLPG"
18+
"XeLPGPlus"
1819
"XeHPC"
1920
"XeHPCVG"
21+
"Xe2"
2022
)

IGC/VectorCompiler/igcdeps/src/TranslationInterface.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ getPlatformName(const PLATFORM &Platform) {
170170
return {"XeHPG", RevId};
171171
if (Product == IGFX_METEORLAKE)
172172
return {"XeLPG", RevId};
173+
if (Product == IGFX_ARROWLAKE) {
174+
if (GFX_IS_ARL_S(DevId))
175+
return {"XeLPG", RevId};
176+
return {"XeLPGPlus", RevId};
177+
}
173178
break;
174179
case IGFX_XE_HPC_CORE:
175180
if (Product == IGFX_PVC) {
@@ -178,6 +183,10 @@ getPlatformName(const PLATFORM &Platform) {
178183
return {"XeHPC", RevId & ComputeTileMaskPVC};
179184
}
180185
break;
186+
case IGFX_XE2_LPG_CORE:
187+
if (Product == IGFX_LUNARLAKE)
188+
return {"Xe2", RevId};
189+
break;
181190
default:
182191
break;
183192
}

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,63 @@
724724
],
725725
"attributes": "WriteMem", },
726726

727+
## ``llvm.vc.internal.lsc.*.quad.tgm`` : Typed LSC load intrinsic
728+
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
729+
## * arg0: vNi1 Predicate (overloaded)
730+
## * arg1: i8 L1 cache controls [MBC]
731+
## * arg2: i8 L3 cache controls [MBC]
732+
## * arg3: i8 Channel mask [MBC]
733+
## * arg4: i32 BTI
734+
## * arg5: vNi32 U pixel indices (overloaded)
735+
## * arg6: vNi32 V pixel indices
736+
## * arg7: vNi32 R pixel indices
737+
## * arg8: vNi32 LOD pixel indices
738+
## * arg9: vector to take values for masked simd lanes from (load)
739+
## vector to take values to write (store)
740+
##
741+
## * Return value: the value read from memory (load) or void (store, prefetch)
742+
##
743+
"lsc_load_quad_tgm": { "result": "anyvector",
744+
"arguments": [
745+
"anyint", # vNxi1, predicate
746+
"char", # L1 cache control
747+
"char", # L3 cache control
748+
"char", # channel mask
749+
"int", # i32 BTI
750+
"anyint", # vNi32 U pixel index
751+
2, # vNi32 V pixel index
752+
2, # vNi32 R pixel index
753+
2, # vNi32 LOD pixel index
754+
0, # passthru value
755+
],
756+
"attributes": "ReadMem", },
757+
"lsc_store_quad_tgm": { "result": "void",
758+
"arguments": [
759+
"anyint", # vNxi1, predicate
760+
"char", # L1 cache control
761+
"char", # L3 cache control
762+
"char", # channel mask
763+
"int", # i32 BTI
764+
"anyint", # vNi32 U pixel index
765+
1, # vNi32 V pixel index
766+
1, # vNi32 R pixel index
767+
1, # vNi32 LOD pixel index
768+
"anyvector", # data to write
769+
],
770+
"attributes": "WriteMem", },
771+
"lsc_prefetch_quad_tgm": { "result": "void",
772+
"arguments": [
773+
"anyint", # vNxi1, predicate
774+
"char", # L1 cache control
775+
"char", # L3 cache control
776+
"char", # channel mask
777+
"int", # i32 BTI
778+
"anyint", # vNi32 U pixel index
779+
1, # vNi32 V pixel index
780+
1, # vNi32 R pixel index
781+
1, # vNi32 LOD pixel index
782+
],
783+
"attributes": "SideEffects", },
727784

728785
### --------------------
729786
### Thread ID intrinsics

IGC/VectorCompiler/lib/GenXCodeGen/GenX.td

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@ def FeaturePartialI64Emulation : SubtargetFeature<"lightweight_i64_emulation",
4747
"PartialI64Emulation",
4848
"true",
4949
"emulate subset of 64-bit operations">;
50-
50+
def FeatureNoLegacyDataport : SubtargetFeature<"no_legacy_dataport",
51+
"NoLegacyDataport",
52+
"true",
53+
"true if platform has no legacy dataport">;
5154
def FeatureUseMulDDQ : SubtargetFeature<"mul_ddq",
5255
"UseMulDDQ",
5356
"true",
5457
"use native support for mul [U]Dx[U]D->Q">;
55-
5658
def FeatureLongLongEmulation : SubtargetFeature<"emulate_i64",
5759
"EmulateLongLong",
5860
"true",
@@ -74,6 +76,9 @@ def FeatureSwitchjmp : SubtargetFeature<"switchjmp", "HasSwitchjmp", "true",
7476

7577
def FeaturePreemption : SubtargetFeature<"preemption", "HasPreemption", "true",
7678
"supports preemption">;
79+
def FeatureSystolicDenormControl : SubtargetFeature<
80+
"systolic_denorm_control", "HasSystolicDenormControl", "true",
81+
"supports control for systolic pipeline types denormal values">;
7782

7883
def FeatureWAFusedEUNoMask : SubtargetFeature<
7984
"wa_nomask_fusedEU", "WaNoMaskFusedEU", "true",
@@ -152,7 +157,10 @@ def FeatureThreadPayloadInMemory : SubtargetFeature<"thread_payload_in_memory",
152157
def FeatureHasLSC : SubtargetFeature<"feature_has_lsc",
153158
"HasLSCMessages", "true",
154159
"Target supports LSC messages">;
155-
160+
def FeatureHasLSCOffset : SubtargetFeature<"feature_has_lsc_offset",
161+
"HasLSCOffset", "true",
162+
"Target supports constant offset for LSC messages",
163+
[FeatureHasLSC]>;
156164
def FeatureHasAdd3 : SubtargetFeature<"feature_has_add3",
157165
"HasAdd3", "true",
158166
"Target supports 3-way addition">;
@@ -196,8 +204,6 @@ def FeatureSLM64K : SubtargetFeature<"slm_64k",
196204
def FeatureSLM128K : SubtargetFeature<"slm_128k",
197205
"MaxSLMSize", "128",
198206
"Target supports up to 128k of SLM">;
199-
200-
201207
def FeatureHasSad2 : SubtargetFeature<"feature_has_sad2",
202208
"HasSad2", "true",
203209
"Target supports sad2/sad2a instructions">;
@@ -395,6 +401,27 @@ def : Proc<"XeLPG", [
395401
FeatureThreadPayloadInMemory,
396402
]>;
397403

404+
def : Proc<"XeLPGPlus", [
405+
FeatureFDivFSqrt64Emulation,
406+
FeatureFP64,
407+
FeatureFusedEU,
408+
FeatureHas8ThreadsPerEU,
409+
FeatureHasAdd3,
410+
FeatureHasBfn,
411+
FeatureHasLSC,
412+
FeatureHasLargeGRF,
413+
FeatureHasMadSimd32,
414+
FeatureHasOWordSLM,
415+
FeatureHasPackedFloat,
416+
FeatureIndirectByteGRFCrossing,
417+
FeatureIndirectGRFCrossing,
418+
FeatureInstrBitRotate,
419+
FeatureLongLongEmulation,
420+
FeatureMultiIndirectByteRegioning,
421+
FeatureSLM128K,
422+
FeatureThreadPayloadInMemory,
423+
]>;
424+
398425
def : Proc<"XeHPC", [
399426
FeatureFP64,
400427
FeatureGRFByteSize64,
@@ -447,6 +474,35 @@ def : Proc<"XeHPCVG", [
447474
FeatureThreadPayloadInMemory,
448475
]>;
449476

477+
def : Proc<"Xe2", [
478+
FeatureFP64,
479+
FeatureGRFByteSize64,
480+
FeatureHas8ThreadsPerEU,
481+
FeatureHasAdd3,
482+
FeatureHasBfn,
483+
FeatureHasLSC,
484+
FeatureHasLSCOffset,
485+
FeatureHasLargeGRF,
486+
FeatureHasMadSimd32,
487+
FeatureHasNamedBarriers,
488+
FeatureHasOWordSLM,
489+
FeatureIEEEDivSqrt,
490+
FeatureIndirectGRFCrossing,
491+
FeatureInstr64BitRotate,
492+
FeatureInstrAdd64,
493+
FeatureInstrGlobalAtomicAddF64,
494+
FeatureInstrLocalIntegerCas64,
495+
FeatureLSCMaxWidth32,
496+
FeatureLongLong,
497+
FeatureNoLegacyDataport,
498+
FeaturePartialI64Emulation,
499+
FeaturePreemption,
500+
FeatureSLM128K,
501+
FeatureSwitchjmp,
502+
FeatureSystolicDenormControl,
503+
FeatureThreadPayloadInMemory,
504+
]>;
505+
450506
def GenX : Target {
451507
// Nothing here (yet?)
452508
}

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ class GenXKernelBuilder {
397397
DoublePrecisionDenorm = 1 << 6,
398398
SinglePrecisionDenorm = 1 << 7,
399399
HalfPrecisionDenorm = 1 << 10,
400+
SystolicDenorm = 1 << 30,
400401
};
401402

402403
uint32_t CRMask = 0;
@@ -1129,6 +1130,8 @@ bool GenXKernelBuilder::run() {
11291130
CRBits::DoublePrecisionDenorm | CRBits::SinglePrecisionDenorm |
11301131
CRBits::HalfPrecisionDenorm;
11311132

1133+
if (Subtarget->hasSystolicDenormControl())
1134+
CRMask |= CRBits::SystolicDenorm;
11321135

11331136
StackCallExecSize =
11341137
getExecSizeFromValue(BackendConfig->getInteropSubgroupSize());
@@ -3763,6 +3766,74 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
37633766
UNSIGNED, Mod, true /* Dst */);
37643767
};
37653768

3769+
auto CreateLscTypedLoadQuad =
3770+
[&](VISA_PredOpnd *Pred, VISA_Exec_Size ExecSize,
3771+
VISA_EMask_Ctrl ExecMask, LSC_CACHE_OPTS CacheOpts,
3772+
LSC_DATA_CHMASK ChMask, VISA_VectorOpnd *Surface, VISA_RawOpnd *Dst,
3773+
VISA_RawOpnd *AddrsU, VISA_RawOpnd *AddrsV, VISA_RawOpnd *AddrsR,
3774+
VISA_RawOpnd *AddrsLOD) {
3775+
LLVM_DEBUG(dbgs() << "CreateLscTypedLoadQuad:\n");
3776+
LLVM_DEBUG(CI->dump());
3777+
LLVM_DEBUG(dbgs() << "\n");
3778+
LSC_DATA_SHAPE Shape = {LSC_DATA_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE};
3779+
Shape.chmask = ChMask;
3780+
CISA_CALL(Kernel->AppendVISALscTypedLoad(
3781+
LSC_OP::LSC_LOAD_QUAD, Pred, ExecSize, ExecMask, CacheOpts,
3782+
LSC_ADDR_TYPE_BTI, LSC_ADDR_SIZE_32b, Shape, Surface, 0, Dst,
3783+
AddrsU, 0, AddrsV, 0, AddrsR, 0, AddrsLOD));
3784+
};
3785+
auto CreateLscTypedStoreQuad =
3786+
[&](VISA_PredOpnd *Pred, VISA_Exec_Size ExecSize,
3787+
VISA_EMask_Ctrl ExecMask, LSC_CACHE_OPTS CacheOpts,
3788+
LSC_DATA_CHMASK ChMask, VISA_VectorOpnd *Surface,
3789+
VISA_RawOpnd *AddrsU, VISA_RawOpnd *AddrsV, VISA_RawOpnd *AddrsR,
3790+
VISA_RawOpnd *AddrsLOD, VISA_RawOpnd *Data) {
3791+
LLVM_DEBUG(dbgs() << "CreateLscTypedStoreQuad:\n");
3792+
LLVM_DEBUG(CI->dump());
3793+
LLVM_DEBUG(dbgs() << "\n");
3794+
LSC_DATA_SHAPE Shape = {LSC_DATA_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE};
3795+
Shape.chmask = ChMask;
3796+
CISA_CALL(Kernel->AppendVISALscTypedStore(
3797+
LSC_OP::LSC_STORE_QUAD, Pred, ExecSize, ExecMask, CacheOpts,
3798+
LSC_ADDR_TYPE_BTI, LSC_ADDR_SIZE_32b, Shape, Surface, 0,
3799+
AddrsU, 0, AddrsV, 0, AddrsR, 0, AddrsLOD, Data));
3800+
};
3801+
3802+
auto CreateLscTyped2D = [&](LSC_OP SubOpcode, LSC_CACHE_OPTS CacheOpts,
3803+
LSC_ADDR_TYPE AddrType, VISA_VectorOpnd *Surface,
3804+
LSC_DATA_SHAPE_TYPED_BLOCK2D DataShape,
3805+
VISA_RawOpnd *Dst, VISA_RawOpnd *Src,
3806+
VISA_VectorOpnd *XOff, VISA_VectorOpnd *YOff) {
3807+
LLVM_DEBUG(dbgs() << "CreateLscTyped2D:\n");
3808+
LLVM_DEBUG(CI->dump());
3809+
LLVM_DEBUG(dbgs() << "\n");
3810+
3811+
// work around VISA spec pecularity: for typed messages width is in bytes
3812+
// not in elements
3813+
VectorType *VT;
3814+
constexpr int SrcOperandNum = 7; // to be in sync with json
3815+
switch (SubOpcode) {
3816+
case LSC_LOAD_BLOCK2D:
3817+
VT = cast<VectorType>(CI->getType());
3818+
break;
3819+
case LSC_STORE_BLOCK2D:
3820+
VT = cast<VectorType>(CI->getArgOperand(SrcOperandNum)->getType());
3821+
break;
3822+
default:
3823+
vc::fatal(getContext(), "GenXCisaBuilder",
3824+
"Unsupported typed 2D operation", CI);
3825+
}
3826+
3827+
auto *ElementType = VT->getElementType();
3828+
unsigned EltSize = DL.getTypeSizeInBits(ElementType) / genx::ByteBits;
3829+
3830+
LLVM_DEBUG(dbgs() << "Multiplying by: " << EltSize << "\n");
3831+
DataShape.width *= EltSize;
3832+
3833+
CISA_CALL(Kernel->AppendVISALscTypedBlock2DInst(
3834+
SubOpcode, CacheOpts, AddrType, DataShape, Surface, 0, Dst, XOff, YOff,
3835+
0, 0, Src));
3836+
};
37663837

37673838
auto CheckLscOp = [&](LSC_SFID LscSfid, LSC_ADDR_TYPE AddressType,
37683839
LSC_ADDR_SIZE AddressSize, LSC_DATA_SIZE ElementSize) {

IGC/VectorCompiler/lib/GenXCodeGen/GenXLegacyToLscTranslator.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class GenXLegacyToLscTranslator
5050
Value *translateSVMGatherScatter(CallInst &CI) const;
5151
Value *translateQuadGatherScatter(CallInst &CI) const;
5252
Value *translateAtomic(CallInst &CI) const;
53+
Value *translateMediaLoadStore(CallInst &CI) const;
5354

5455
const GenXSubtarget *ST = nullptr;
5556
};
@@ -212,6 +213,10 @@ void GenXLegacyToLscTranslator::visitCallInst(CallInst &CI) {
212213
case GenXIntrinsic::genx_svm_block_st:
213214
NewCI = translateOWordLoadStore(CI);
214215
break;
216+
case GenXIntrinsic::genx_media_ld:
217+
case GenXIntrinsic::genx_media_st:
218+
NewCI = translateMediaLoadStore(CI);
219+
break;
215220
}
216221

217222
if (!NewCI) {
@@ -791,6 +796,75 @@ Value *GenXLegacyToLscTranslator::translateAtomic(CallInst &CI) const {
791796
return I;
792797
}
793798

799+
Value *GenXLegacyToLscTranslator::translateMediaLoadStore(CallInst &CI) const {
800+
LLVM_DEBUG(dbgs() << "Translate intrinsic: " << CI);
801+
IRBuilder<> Builder(&CI);
802+
auto IID = vc::getAnyIntrinsicID(&CI);
803+
804+
IGC_ASSERT(IID == GenXIntrinsic::genx_media_ld ||
805+
IID == GenXIntrinsic::genx_media_st);
806+
auto IsLoad = IID == GenXIntrinsic::genx_media_ld;
807+
auto NewIID = IsLoad ? GenXIntrinsic::genx_lsc_load2d_typed_bti
808+
: GenXIntrinsic::genx_lsc_store2d_typed_bti;
809+
810+
auto *Modifier = cast<ConstantInt>(CI.getArgOperand(0));
811+
auto *BTI = CI.getArgOperand(1);
812+
auto *Plane = cast<ConstantInt>(CI.getArgOperand(2));
813+
auto *BlockWidth = cast<ConstantInt>(CI.getArgOperand(3));
814+
auto *AddrX = CI.getArgOperand(4);
815+
auto *AddrY = CI.getArgOperand(5);
816+
Value *Data = nullptr;
817+
IGCLLVM::FixedVectorType *VTy = nullptr;
818+
819+
if (IsLoad) {
820+
VTy = cast<IGCLLVM::FixedVectorType>(CI.getType());
821+
} else {
822+
Data = CI.getArgOperand(6);
823+
VTy = cast<IGCLLVM::FixedVectorType>(Data->getType());
824+
}
825+
826+
if (Modifier->getZExtValue() != 0) {
827+
LLVM_DEBUG(dbgs() << "Modifiers are not supported for media block "
828+
"intrinsic translations: "
829+
<< CI);
830+
return nullptr;
831+
}
832+
if (Plane->getZExtValue() != 0) {
833+
LLVM_DEBUG(dbgs() << "Non-zero plane is not supported for media block "
834+
"intrinsic translations: "
835+
<< CI);
836+
return nullptr;
837+
}
838+
839+
auto *ETy = VTy->getElementType();
840+
unsigned ESize = ETy->getScalarSizeInBits() / ByteBits;
841+
auto DataSize = ESize * VTy->getNumElements();
842+
843+
unsigned Width = BlockWidth->getZExtValue();
844+
unsigned RoundedWidth = roundedVal(Width, 4u);
845+
unsigned Height = DataSize / RoundedWidth;
846+
IGC_ASSERT(Width > 0 && Width <= 64);
847+
IGC_ASSERT(Width % ESize == 0);
848+
IGC_ASSERT(DataSize % RoundedWidth == 0);
849+
850+
SmallVector<Value *, 8> Args = {
851+
Builder.getInt8(0), // L1 cache control (default)
852+
Builder.getInt8(0), // L3 cache control (default)
853+
BTI,
854+
Builder.getInt32(Height),
855+
Builder.getInt32(Width / ESize),
856+
AddrX,
857+
AddrY,
858+
};
859+
if (!IsLoad)
860+
Args.push_back(Data);
861+
862+
auto *Func = GenXIntrinsic::getGenXDeclaration(CI.getModule(), NewIID, {VTy});
863+
auto *I = Builder.CreateCall(Func, Args);
864+
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *I);
865+
return I;
866+
}
867+
794868
bool GenXLegacyToLscTranslator::isLocal(Value *BTI) const {
795869
if (auto *C = dyn_cast<ConstantInt>(BTI))
796870
return C->getZExtValue() == visa::ReservedSurfaceIndex::RSI_Slm;

0 commit comments

Comments
 (0)