Skip to content

Commit f31c646

Browse files
vmustyaigcbot
authored andcommitted
Replace VC math intrinsics with standard LLVM ones
Use standard LLVM instrinsics as follows: * `@llvm.genx.cos` -> `@llvm.cos` * `@llvm.genx.exp` -> `@llvm.exp2` * `@llvm.genx.log` -> `@llvm.log2` * `@llvm.genx.sin` -> `@llvm.sin` * `@llvm.genx.pow` -> `@llvm.pow`
1 parent cbbfbdc commit f31c646

File tree

9 files changed

+284
-108
lines changed

9 files changed

+284
-108
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,16 +2028,6 @@ bool GenXLowering::processInst(Instruction *Inst) {
20282028
return lowerMathIntrinsic(CI, GenXIntrinsic::genx_rndd);
20292029
case Intrinsic::trunc:
20302030
return lowerMathIntrinsic(CI, GenXIntrinsic::genx_rndz);
2031-
case Intrinsic::exp2:
2032-
return lowerFastMathIntrinsic(CI, GenXIntrinsic::genx_exp);
2033-
case Intrinsic::log2:
2034-
return lowerFastMathIntrinsic(CI, GenXIntrinsic::genx_log);
2035-
case Intrinsic::pow:
2036-
return lowerFastMathIntrinsic(CI, GenXIntrinsic::genx_pow);
2037-
case Intrinsic::sin:
2038-
return lowerFastMathIntrinsic(CI, GenXIntrinsic::genx_sin);
2039-
case Intrinsic::cos:
2040-
return lowerFastMathIntrinsic(CI, GenXIntrinsic::genx_cos);
20412031
case Intrinsic::stacksave:
20422032
return lowerStackSave(CI);
20432033
case Intrinsic::stackrestore:

IGC/VectorCompiler/lib/GenXCodeGen/Utils/cisa_gen_intrinsics.json

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,47 @@
2121
"src1": [ "GENERAL", "MODIFIER_ARITH", 2 ],
2222
"src2": [ "GENERAL", "MODIFIER_ARITH", 3 ]
2323
},
24+
"cos": {
25+
"opc": "ISA_COS",
26+
"exec_size": [ "EXECSIZE" ],
27+
"elementwise": [ "ELEMENTWISE" ],
28+
"pred": [ "IMPLICITPRED" ],
29+
"dst": [ "GENERAL", 0 ],
30+
"src0": [ "GENERAL", 1 ]
31+
},
32+
"exp2": {
33+
"opc": "ISA_EXP",
34+
"exec_size": [ "EXECSIZE" ],
35+
"elementwise": [ "ELEMENTWISE" ],
36+
"pred": [ "IMPLICITPRED" ],
37+
"dst": [ "GENERAL", 0 ],
38+
"src0": [ "GENERAL", 1 ]
39+
},
40+
"log2": {
41+
"opc": "ISA_LOG",
42+
"exec_size": [ "EXECSIZE" ],
43+
"elementwise": [ "ELEMENTWISE" ],
44+
"pred": [ "IMPLICITPRED" ],
45+
"dst": [ "GENERAL", 0 ],
46+
"src0": [ "GENERAL", 1 ]
47+
},
48+
"pow": {
49+
"opc": "ISA_POW",
50+
"exec_size": [ "EXECSIZE" ],
51+
"elementwise": [ "ELEMENTWISE" ],
52+
"pred": [ "IMPLICITPRED" ],
53+
"dst": [ "GENERAL", 0 ],
54+
"src0": [ "GENERAL", 1 ],
55+
"src1": [ "GENERAL", 2 ]
56+
},
57+
"sin": {
58+
"opc": "ISA_SIN",
59+
"exec_size": [ "EXECSIZE" ],
60+
"elementwise": [ "ELEMENTWISE" ],
61+
"pred": [ "IMPLICITPRED" ],
62+
"dst": [ "GENERAL", 0 ],
63+
"src0": [ "GENERAL", 1 ]
64+
},
2465
// Internal vc intrinsics
2566
"vc::InternalIntrinsic::cast_from_bf16": {
2667
"opc": "ISA_MOV",
@@ -1645,15 +1686,6 @@
16451686
"src0": [ "GENERAL", "MODIFIER_ARITH", 1 ],
16461687
"src1": [ "GENERAL", "MODIFIER_ARITH", 2 ]
16471688
},
1648-
"genx_pow": {
1649-
"opc": "ISA_POW",
1650-
"exec_size": [ "EXECSIZE" ],
1651-
"elementwise": [ "ELEMENTWISE" ],
1652-
"pred": [ "IMPLICITPRED" ],
1653-
"dst": [ "GENERAL", 0 ],
1654-
"src0": [ "GENERAL", 1 ],
1655-
"src1": [ "GENERAL", 2 ]
1656-
},
16571689
"genx_add_addr": {
16581690
"opc": "ISA_ADDR_ADD",
16591691
"exec_size": [ "EXECSIZE" ],
@@ -1706,22 +1738,6 @@
17061738
"dst": [ "GENERAL", 0 ],
17071739
"src0": [ "GENERAL", 1 ]
17081740
},
1709-
"genx_log": {
1710-
"opc": "ISA_LOG",
1711-
"exec_size": [ "EXECSIZE" ],
1712-
"elementwise": [ "ELEMENTWISE" ],
1713-
"pred": [ "IMPLICITPRED" ],
1714-
"dst": [ "GENERAL", 0 ],
1715-
"src0": [ "GENERAL", 1 ]
1716-
},
1717-
"genx_exp": {
1718-
"opc": "ISA_EXP",
1719-
"exec_size": [ "EXECSIZE" ],
1720-
"elementwise": [ "ELEMENTWISE" ],
1721-
"pred": [ "IMPLICITPRED" ],
1722-
"dst": [ "GENERAL", 0 ],
1723-
"src0": [ "GENERAL", 1 ]
1724-
},
17251741
"genx_scatter_scaled": {
17261742
"opc": "ISA_SCATTER_SCALED",
17271743
"exec_size": [ "EXECSIZE_FROM_ARG", 1 ],
@@ -2260,22 +2276,6 @@
22602276
"deltaV": [ "GENERAL", 7 ],
22612277
"dst": [ "RAW", 0 ]
22622278
},
2263-
"genx_sin": {
2264-
"opc": "ISA_SIN",
2265-
"exec_size": [ "EXECSIZE" ],
2266-
"elementwise": [ "ELEMENTWISE" ],
2267-
"pred": [ "IMPLICITPRED" ],
2268-
"dst": [ "GENERAL", 0 ],
2269-
"src0": [ "GENERAL", 1 ]
2270-
},
2271-
"genx_cos": {
2272-
"opc": "ISA_COS",
2273-
"exec_size": [ "EXECSIZE" ],
2274-
"elementwise": [ "ELEMENTWISE" ],
2275-
"pred": [ "IMPLICITPRED" ],
2276-
"dst": [ "GENERAL", 0 ],
2277-
"src0": [ "GENERAL", 1 ]
2278-
},
22792279
"genx_ssavg": {
22802280
"opc": "ISA_AVG",
22812281
"exec_size": [ "EXECSIZE" ],

IGC/VectorCompiler/lib/GenXOpts/CMPacketize/GenXPacketize.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -825,19 +825,20 @@ Value *GenXPacketize::packetizeLLVMIntrinsic(Instruction *pInst) {
825825
}
826826

827827
// override certain intrinsics
828-
Value *pNewCall;
829828
switch (id) {
829+
default:
830+
break;
830831
case Intrinsic::log2:
831-
pNewCall = B->VLOG2PS(packetizedArgs[0]);
832+
if (!pInst->hasApproxFunc())
833+
return B->VLOG2PS(packetizedArgs[0]);
832834
break;
833835
case Intrinsic::exp2:
834-
pNewCall = B->VEXP2PS(packetizedArgs[0]);
836+
if (!pInst->hasApproxFunc())
837+
return B->VEXP2PS(packetizedArgs[0]);
835838
break;
836-
default: {
837-
Function *newF = getVectorIntrinsic(M, id, vectorArgTys);
838-
pNewCall = CallInst::Create(newF, packetizedArgs, "", pCall);
839-
}
840839
}
840+
Function *newF = getVectorIntrinsic(M, id, vectorArgTys);
841+
auto *pNewCall = CallInst::Create(newF, packetizedArgs, "", pCall);
841842
return pNewCall;
842843
}
843844

IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXImportOCLBiF.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,21 @@ BIConvert::BIConvert() {
8787
OneMap["__builtin_IB_frnd_pi"] = GenXIntrinsic::genx_rndu;
8888
OneMap["__builtin_IB_frnd_ni"] = GenXIntrinsic::genx_rndd;
8989
OneMap["__builtin_IB_frnd_zi"] = GenXIntrinsic::genx_rndz;
90-
OneMap["__builtin_IB_native_cosf"] = GenXIntrinsic::genx_cos;
91-
OneMap["__builtin_IB_native_cosh"] = GenXIntrinsic::genx_cos;
92-
OneMap["__builtin_IB_native_sinf"] = GenXIntrinsic::genx_sin;
93-
OneMap["__builtin_IB_native_sinh"] = GenXIntrinsic::genx_sin;
94-
OneMap["__builtin_IB_native_exp2f"] = GenXIntrinsic::genx_exp;
95-
OneMap["__builtin_IB_native_exp2h"] = GenXIntrinsic::genx_exp;
96-
OneMap["__builtin_IB_native_log2f"] = GenXIntrinsic::genx_log;
97-
OneMap["__builtin_IB_native_log2h"] = GenXIntrinsic::genx_log;
98-
OneMap["__builtin_IB_native_sqrtf"] = GenXIntrinsic::genx_sqrt;
99-
OneMap["__builtin_IB_native_sqrth"] = GenXIntrinsic::genx_sqrt;
100-
OneMap["__builtin_IB_native_sqrtd"] = GenXIntrinsic::genx_sqrt;
90+
OneMap["__builtin_IB_native_cosf"] = Intrinsic::cos;
91+
OneMap["__builtin_IB_native_cosh"] = Intrinsic::cos;
92+
OneMap["__builtin_IB_native_sinf"] = Intrinsic::sin;
93+
OneMap["__builtin_IB_native_sinh"] = Intrinsic::sin;
94+
OneMap["__builtin_IB_native_exp2f"] = Intrinsic::exp2;
95+
OneMap["__builtin_IB_native_exp2h"] = Intrinsic::exp2;
96+
OneMap["__builtin_IB_native_log2f"] = Intrinsic::log2;
97+
OneMap["__builtin_IB_native_log2h"] = Intrinsic::log2;
98+
OneMap["__builtin_IB_native_sqrtf"] = Intrinsic::sqrt;
99+
OneMap["__builtin_IB_native_sqrth"] = Intrinsic::sqrt;
100+
OneMap["__builtin_IB_native_sqrtd"] = Intrinsic::sqrt;
101101
OneMap["__builtin_IB_popcount_1u32"] = GenXIntrinsic::genx_cbit;
102102
OneMap["__builtin_IB_popcount_1u16"] = GenXIntrinsic::genx_cbit;
103103
OneMap["__builtin_IB_popcount_1u8"] = GenXIntrinsic::genx_cbit;
104-
OneMap["__builtin_IB_native_powrf"] = GenXIntrinsic::genx_pow;
104+
OneMap["__builtin_IB_native_powrf"] = Intrinsic::pow;
105105
OneMap["__builtin_IB_fma"] = Intrinsic::fma;
106106
OneMap["__builtin_IB_fmah"] = Intrinsic::fma;
107107
OneMap["__builtin_IB_bfrev"] = GenXIntrinsic::genx_bfrev;
@@ -185,7 +185,6 @@ static Function *getOneMapIntrinsicDeclaration(CallInst &CI, const unsigned IID,
185185
return vc::getGenXDeclarationForIdFromArgs(
186186
CI.getType(), CI.args(), static_cast<GenXIntrinsic::ID>(IID), M);
187187

188-
IGC_ASSERT_MESSAGE(IID == Intrinsic::fma, "Expected fma intrinsic");
189188
return Intrinsic::getDeclaration(&M, static_cast<Intrinsic::ID>(IID),
190189
{CI.getType()});
191190
}
@@ -237,6 +236,8 @@ void BIConvert::runOnModule(Module &M) {
237236
const SmallVector<llvm::Value *, 3> Args{InstCall->args()};
238237
Instruction *const IntrinCall =
239238
CallInst::Create(IntrinFunc, Args, InstCall->getName(), InstCall);
239+
if (!GenXIntrinsic::isGenXIntrinsic(IID) && IID != Intrinsic::fma)
240+
IntrinCall->setHasApproxFunc(true);
240241
IntrinCall->setDebugLoc(InstCall->getDebugLoc());
241242
InstCall->replaceAllUsesWith(IntrinCall);
242243
ListDelete.push_back(InstCall);

IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXTranslateIntrinsics.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ class GenXTranslateIntrinsics final
4848

4949
private:
5050
Constant *translateCacheControls(Constant *L1, Constant *L3) const;
51-
51+
Value *translateMath(CallInst &I, Intrinsic::ID IID,
52+
bool HasApproxFunc = true) const;
5253
Value *translateBFloat16Convert(CallInst &I) const;
5354
Value *translateTFloat32Convert(CallInst &I) const;
5455
Value *translateStochasticRounding(CallInst &I) const;
@@ -84,6 +85,21 @@ void GenXTranslateIntrinsics::visitCallInst(CallInst &I) const {
8485
switch (IID) {
8586
default:
8687
return;
88+
case GenXIntrinsic::genx_cos:
89+
NewI = translateMath(I, Intrinsic::cos);
90+
break;
91+
case GenXIntrinsic::genx_exp:
92+
NewI = translateMath(I, Intrinsic::exp2);
93+
break;
94+
case GenXIntrinsic::genx_log:
95+
NewI = translateMath(I, Intrinsic::log2);
96+
break;
97+
case GenXIntrinsic::genx_sin:
98+
NewI = translateMath(I, Intrinsic::sin);
99+
break;
100+
case GenXIntrinsic::genx_pow:
101+
NewI = translateMath(I, Intrinsic::pow);
102+
break;
87103
case GenXIntrinsic::genx_bf_cvt:
88104
NewI = translateBFloat16Convert(I);
89105
break;
@@ -139,6 +155,19 @@ void GenXTranslateIntrinsics::visitCallInst(CallInst &I) const {
139155
return;
140156
}
141157

158+
Value *GenXTranslateIntrinsics::translateMath(CallInst &I, Intrinsic::ID IID,
159+
bool HasApproxFunc) const {
160+
LLVM_DEBUG(dbgs() << "Translate: " << I << "\n");
161+
IRBuilder<> Builder(&I);
162+
163+
SmallVector<Value *, 4> Args(I.args());
164+
auto *NewI = Builder.CreateIntrinsic(IID, {I.getType()}, Args);
165+
NewI->setHasApproxFunc(HasApproxFunc);
166+
167+
LLVM_DEBUG(dbgs() << "Created: " << *NewI << "\n");
168+
return NewI;
169+
}
170+
142171
Value *GenXTranslateIntrinsics::translateBFloat16Convert(CallInst &I) const {
143172
IGC_ASSERT_EXIT(GenXIntrinsic::getGenXIntrinsicID(&I) ==
144173
GenXIntrinsic::genx_bf_cvt);
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; COM: ;;;;;;;;;; RUNNERS ;;;;;;;;;;
10+
11+
; RUN: llc %s -march=genx64 -mcpu=XeHPG -vc-skip-ocl-runtime-info -finalizer-opts='-dumpcommonisa -isaasmToConsole' -o /dev/null \
12+
; RUN: | FileCheck %s
13+
14+
15+
; COM: ;;;;;;;;;; CHECKERS ;;;;;;;;;;
16+
17+
; CHECK: .decl [[SRC:V[^ ]+]] v_type=G type=hf num_elts=8
18+
; CHECK: cos (M1, 8) [[COS:V[^ ]+]](0,0)<1> [[SRC]](0,0)<1;1,0>
19+
; CHECK: exp (M1, 8) [[EXP:V[^ ]+]](0,0)<1> [[SRC]](0,0)<1;1,0>
20+
; CHECK: log (M1, 8) [[LOG:V[^ ]+]](0,0)<1> [[SRC]](0,0)<1;1,0>
21+
; CHECK: sin (M1, 8) [[SIN:V[^ ]+]](0,0)<1> [[SRC]](0,0)<1;1,0>
22+
23+
; COM: ;;;;;;;;;; KERNEL ;;;;;;;;;;
24+
25+
target datalayout = "e-p:64:64-i64:64-n8:16:32"
26+
target triple = "genx64-unknown-unknown"
27+
28+
declare <8 x half> @llvm.genx.oword.ld.v8f16(i32, i32, i32)
29+
declare void @llvm.genx.oword.st.v8f16(i32, i32, <8 x half>)
30+
31+
declare <8 x half> @llvm.cos.v8f16(<8 x half>)
32+
declare <8 x half> @llvm.exp2.v8f16(<8 x half>)
33+
declare <8 x half> @llvm.log2.v8f16(<8 x half>)
34+
declare <8 x half> @llvm.sin.v8f16(<8 x half>)
35+
36+
define dllexport spir_kernel void @math(i32 %0, i32 %1) local_unnamed_addr #0 {
37+
%src = tail call <8 x half> @llvm.genx.oword.ld.v8f16(i32 0, i32 %0, i32 0)
38+
39+
%cos = call afn <8 x half> @llvm.cos.v8f16(<8 x half> %src)
40+
%exp = call afn <8 x half> @llvm.exp2.v8f16(<8 x half> %src)
41+
%log = call afn <8 x half> @llvm.log2.v8f16(<8 x half> %src)
42+
%sin = call afn <8 x half> @llvm.sin.v8f16(<8 x half> %src)
43+
44+
tail call void @llvm.genx.oword.st.v8f16(i32 %1, i32 0, <8 x half> %cos)
45+
tail call void @llvm.genx.oword.st.v8f16(i32 %1, i32 2, <8 x half> %exp)
46+
tail call void @llvm.genx.oword.st.v8f16(i32 %1, i32 4, <8 x half> %log)
47+
tail call void @llvm.genx.oword.st.v8f16(i32 %1, i32 6, <8 x half> %sin)
48+
ret void
49+
}
50+
51+
attributes #0 = { noinline nounwind "CMGenxMain" }
52+
53+
!spirv.Source = !{!1}
54+
!opencl.spir.version = !{!2}
55+
!opencl.ocl.version = !{!1}
56+
!opencl.used.extensions = !{!0}
57+
!opencl.used.optional.core.features = !{!0}
58+
!spirv.Generator = !{!3}
59+
!genx.kernels = !{!4}
60+
!genx.kernel.internal = !{!8}
61+
62+
!0 = !{}
63+
!1 = !{i32 0, i32 0}
64+
!2 = !{i32 1, i32 2}
65+
!3 = !{i16 6, i16 14}
66+
!4 = !{void (i32, i32)* @math, !"math", !5, i32 0, !6, !1, !7, i32 0}
67+
!5 = !{i32 2, i32 2}
68+
!6 = !{i32 64, i32 68}
69+
!7 = !{!"buffer_t", !"buffer_t"}
70+
!8 = !{void (i32, i32)* @math, null, null, null, null}

0 commit comments

Comments
 (0)