Skip to content

Commit 18183da

Browse files
fda0igcbot
authored andcommitted
Add sat argument to dp4a GenISA builtins - try 2
Add sat argument to dp4a internal builtins and GenISA builtins. Remove add_sat instructions. Propagate dp4a saturation data trough the compiler.
1 parent 3b8b434 commit 18183da

File tree

7 files changed

+57
-67
lines changed

7 files changed

+57
-67
lines changed

IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -806,10 +806,10 @@ void __builtin_IB_media_block_write_ulong2(int image, int2 offset, int width, in
806806
void __builtin_IB_media_block_write_ulong4(int image, int2 offset, int width, int height, ulong4 pixels);
807807
void __builtin_IB_media_block_write_ulong8(int image, int2 offset, int width, int height, ulong8 pixels);
808808

809-
int __builtin_IB_dp4a_ss(int c, int a, int b) __attribute__((const));
810-
int __builtin_IB_dp4a_uu(int c, int a, int b) __attribute__((const));
811-
int __builtin_IB_dp4a_su(int c, int a, int b) __attribute__((const));
812-
int __builtin_IB_dp4a_us(int c, int a, int b) __attribute__((const));
809+
int __builtin_IB_dp4a_ss(int c, int a, int b, bool isSaturated) __attribute__((const));
810+
int __builtin_IB_dp4a_uu(int c, int a, int b, bool isSaturated) __attribute__((const));
811+
int __builtin_IB_dp4a_su(int c, int a, int b, bool isSaturated) __attribute__((const));
812+
int __builtin_IB_dp4a_us(int c, int a, int b, bool isSaturated) __attribute__((const));
813813

814814
#define DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, group_type) \
815815
type __builtin_IB_sub_group_##group_type##_##operation##_##type_abbr(type x) __attribute__((const));

IGC/BiFModule/Languages/OpenCL/IBiF_Dot_Product.cl

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,13 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc)
3737
#define DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB) \
3838
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b) \
3939
{ \
40-
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
40+
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
4141
}
4242

43-
#define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB, SAT_PREFIX) \
44-
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45-
{ \
46-
TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
47-
return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
43+
#define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB) \
44+
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45+
{ \
46+
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
4847
}
4948

5049
#define DEFN_INTEL_DOT_PRODUCT_US(TYPE_RET, TYPE_ARG, MANGLING_OLD, MANGLING_NEW) \
@@ -62,14 +61,13 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(u##TYPE_ARG a, TYPE_ARG b, TYPE_RET acc
6261
#define DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB) \
6362
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_ARG1 packed) \
6463
{ \
65-
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
64+
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
6665
}
6766

68-
#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB, SAT_PREFIX) \
69-
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
70-
{ \
71-
TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
72-
return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
67+
#define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(TYPE_RET, TYPE_ARG1, TYPE_ARG2, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW, TYPE_SUFFIX_IB) \
68+
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
69+
{ \
70+
return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
7371
}
7472

7573
#define DEFN_INTEL_DOT_PRODUCT_PACKED(TYPE_RET, ARG_TYPES, TYPE_SUFFIX, MANGLING_OLD, MANGLING_NEW) \
@@ -105,12 +103,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32, _
105103
DEFN_INTEL_DOT_PRODUCT_PACKED(uint, uu, U, _i32_i32_i32, _Ruint)
106104
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
107105
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
108-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, _v4i8_v4i8_i32, _Ruint, uu, u)
106+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, uchar4, uchar4, U, _v4i8_v4i8_i32, _Ruint, uu)
109107
DEFN_INTEL_DOT_PRODUCT_SAT(uint, uchar4, uchar4, U, _v4i8_v4i8_i32, _Ruint)
110-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, _v2i16_v2i16_i32, _Ruint, uu, u)
108+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(uint, ushort2, ushort2, U, _v2i16_v2i16_i32, _Ruint, uu)
111109
DEFN_INTEL_DOT_PRODUCT_SAT(uint, ushort2, ushort2, U, _v2i16_v2i16_i32, _Ruint)
112110
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
113-
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32_i32, _Ruint, uu, u)
111+
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32_i32, _Ruint, uu)
114112
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(uint, U, uu, _i32_i32_i32_i32, _Ruint)
115113
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
116114
#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -125,12 +123,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32, _Rin
125123
DEFN_INTEL_DOT_PRODUCT_PACKED(int, ss, S, _i32_i32_i32, _Rint)
126124
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
127125
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
128-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, char4, S, _v4i8_v4i8_i32, _Rint, ss, s)
126+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, char4, S, _v4i8_v4i8_i32, _Rint, ss)
129127
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, char4, S, _v4i8_v4i8_i32, _Rint)
130-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, short2, S, _v2i16_v2i16_i32, _Rint, ss, s)
128+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, short2, S, _v2i16_v2i16_i32, _Rint, ss)
131129
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, short2, S, _v2i16_v2i16_i32, _Rint)
132130
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
133-
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32_i32, _Rint, ss, s)
131+
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32_i32, _Rint, ss)
134132
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, S, ss, _i32_i32_i32_i32, _Rint)
135133
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
136134
#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -149,14 +147,14 @@ DEFN_INTEL_DOT_PRODUCT_PACKED(int, su, SU, _i32_i32_i32, _Rint)
149147
DEFN_INTEL_DOT_PRODUCT_PACKED_US
150148
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
151149
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
152-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, uchar4, SU, _v4i8_v4i8_i32, _Rint, su, s)
150+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, char4, uchar4, SU, _v4i8_v4i8_i32, _Rint, su)
153151
DEFN_INTEL_DOT_PRODUCT_SAT(int, char4, uchar4, SU, _v4i8_v4i8_i32, _Rint)
154152
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, char4, _v4i8_v4i8_i32, _Rint)
155-
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, ushort2, SU, _v2i16_v2i16_i32, _Rint, su, s)
153+
DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV(int, short2, ushort2, SU, _v2i16_v2i16_i32, _Rint, su)
156154
DEFN_INTEL_DOT_PRODUCT_SAT(int, short2, ushort2, SU, _v2i16_v2i16_i32, _Rint)
157155
DEFN_INTEL_DOT_PRODUCT_SAT_US(int, short2, _v2i16_v2i16_i32, _Rint)
158156
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
159-
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, _i32_i32_i32_i32, _Rint, su, s)
157+
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV(int, int, uint, SU, _i32_i32_i32_i32, _Rint, su)
160158
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED(int, SU, su, _i32_i32_i32_i32, _Rint)
161159
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US
162160
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8822,7 +8822,8 @@ void EmitPass::emitAluNoModifier(llvm::GenIntrinsicInst* inst)
88228822

88238823
void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
88248824
{
8825-
switch (inst->getIntrinsicID())
8825+
const GenISAIntrinsic::ID intrinsicID = inst->getIntrinsicID();
8826+
switch (intrinsicID)
88268827
{
88278828
case GenISAIntrinsic::GenISA_OUTPUT:
88288829
emitOutput(inst);
@@ -9410,8 +9411,15 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
94109411
case GenISAIntrinsic::GenISA_dp4a_uu:
94119412
case GenISAIntrinsic::GenISA_dp4a_su:
94129413
case GenISAIntrinsic::GenISA_dp4a_us:
9413-
emitDP4A(inst);
9414+
{
9415+
ConstantInt* constIsSaturated = cast<ConstantInt>(inst->getOperand(3));
9416+
DstModifier modifier;
9417+
modifier.sat = constIsSaturated->getValue().getBoolValue();
9418+
9419+
bool isAccSigned = intrinsicID != GenISAIntrinsic::GenISA_dp4a_uu;
9420+
emitDP4A(inst, nullptr, modifier, isAccSigned);
94149421
break;
9422+
}
94159423
case GenISAIntrinsic::GenISA_evaluateSampler:
94169424
// nothing to do
94179425
break;

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3737,19 +3737,6 @@ namespace IGC
37373737
}
37383738
};
37393739

3740-
// dp4a with modifiers
3741-
struct Dp4aSatPattern : Pattern
3742-
{
3743-
GenIntrinsicInst* inst;
3744-
bool isAccSigned;
3745-
virtual void Emit(EmitPass* pass, const DstModifier& modifier)
3746-
{
3747-
DstModifier mod = modifier;
3748-
mod.sat = true;
3749-
pass->emitDP4A(inst, nullptr, mod, isAccSigned);
3750-
}
3751-
};
3752-
37533740

37543741
bool match = false;
37553742
llvm::Value* source = nullptr;
@@ -3795,26 +3782,6 @@ namespace IGC
37953782
satPattern->src = GetSource(truncInst->getOperand(0), !isUnsigned, false, IsSourceOfSample(&I));
37963783
AddPattern(satPattern);
37973784
}
3798-
else if (llvm::GenIntrinsicInst * genIsaInst = llvm::dyn_cast<llvm::GenIntrinsicInst>(source);
3799-
genIsaInst &&
3800-
(genIsaInst->getIntrinsicID() == llvm::GenISAIntrinsic::ID::GenISA_dp4a_ss ||
3801-
genIsaInst->getIntrinsicID() == llvm::GenISAIntrinsic::ID::GenISA_dp4a_su ||
3802-
genIsaInst->getIntrinsicID() == llvm::GenISAIntrinsic::ID::GenISA_dp4a_uu ||
3803-
genIsaInst->getIntrinsicID() == llvm::GenISAIntrinsic::ID::GenISA_dp4a_us))
3804-
{
3805-
match = true;
3806-
3807-
uint numSources = GetNbSources(*sourceInst);
3808-
for (uint i = 0; i < numSources; i++)
3809-
{
3810-
MarkAsSource(sourceInst->getOperand(i), IsSourceOfSample(&I));
3811-
}
3812-
3813-
Dp4aSatPattern* dp4aSatPattern = new (m_allocator) Dp4aSatPattern();
3814-
dp4aSatPattern->inst = genIsaInst;
3815-
dp4aSatPattern->isAccSigned = !isUnsigned;
3816-
AddPattern(dp4aSatPattern);
3817-
}
38183785
else
38193786
{
38203787
IGC_ASSERT_MESSAGE(0, "An undefined pattern match");

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1464,9 +1464,10 @@ void CustomSafeOptPass::matchDp4a(BinaryOperator &I) {
14641464
}
14651465
Value* ValA = Builder.CreateBitCast(VectorA, Builder.getInt32Ty());
14661466
Value* ValB = Builder.CreateBitCast(VectorB, Builder.getInt32Ty());
1467+
Value* ValSat = Builder.getInt1(false);
14671468

14681469
Function* Dp4aFun = GenISAIntrinsic::getDeclaration(I.getModule(), IntrinsicID, Builder.getInt32Ty());
1469-
Value* Res = Builder.CreateCall(Dp4aFun, { AccVal, ValA, ValB });
1470+
Value* Res = Builder.CreateCall(Dp4aFun, { AccVal, ValA, ValB, ValSat });
14701471
I.replaceAllUsesWith(Res);
14711472
}
14721473

IGC/Compiler/tests/CustomSafeOptPass/dp4a.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ define i32 @test_dp4a_ss_noacc(i32 %src1, i32 %src2) {
3737
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
3838
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
3939
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
40-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]])
40+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]], i1 false)
4141
; CHECK: ret i32 [[TMP11]]
4242
;
4343
%vec1 = bitcast i32 %src1 to <4 x i8>
@@ -90,7 +90,7 @@ define i32 @test_dp4a_ss_end(i32 %src1, i32 %src2, i32 %acc) {
9090
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
9191
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
9292
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
93-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]])
93+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]], i1 false)
9494
; CHECK: [[TMP12:%.*]] = add i32 [[ACC:%.*]], [[TMP11]]
9595
; CHECK: ret i32 [[TMP12]]
9696
;
@@ -145,7 +145,7 @@ define i32 @test_dp4a_ss_forw(i32 %src1, i32 %src2, i32 %acc) {
145145
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
146146
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
147147
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
148-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 [[ACC:%.*]], i32 [[TMP9]], i32 [[TMP10]])
148+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.ss.i32(i32 [[ACC:%.*]], i32 [[TMP9]], i32 [[TMP10]], i1 false)
149149
; CHECK: ret i32 [[TMP11]]
150150
;
151151
%vec1 = bitcast i32 %src1 to <4 x i8>
@@ -199,7 +199,7 @@ define i32 @test_dp4a_us_noacc(i32 %src1, i32 %src2) {
199199
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
200200
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
201201
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
202-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.us.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]])
202+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.us.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]], i1 false)
203203
; CHECK: ret i32 [[TMP11]]
204204
;
205205
%vec1 = bitcast i32 %src1 to <4 x i8>
@@ -253,7 +253,7 @@ define i32 @test_dp4a_su_noacc(i32 %src1, i32 %src2) {
253253
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
254254
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
255255
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
256-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.su.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]])
256+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.su.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]], i1 false)
257257
; CHECK: ret i32 [[TMP11]]
258258
;
259259
%vec1 = bitcast i32 %src1 to <4 x i8>
@@ -306,7 +306,7 @@ define i32 @test_dp4a_uu_noacc(i32 %src1, i32 %src2) {
306306
; CHECK: [[TMP8:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[H]], i64 3
307307
; CHECK: [[TMP9:%.*]] = bitcast <4 x i8> [[TMP7]] to i32
308308
; CHECK: [[TMP10:%.*]] = bitcast <4 x i8> [[TMP8]] to i32
309-
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.uu.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]])
309+
; CHECK: [[TMP11:%.*]] = call i32 @llvm.genx.GenISA.dp4a.uu.i32(i32 0, i32 [[TMP9]], i32 [[TMP10]], i1 false)
310310
; CHECK: ret i32 [[TMP11]]
311311
;
312312
%vec1 = bitcast i32 %src1 to <4 x i8>

IGC/GenISAIntrinsics/generator/input/Intrinsic_definitions.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2872,6 +2872,10 @@ intrinsics:
28722872
name: Arg2
28732873
type_definition: *i32
28742874
comment: "b of char4"
2875+
- !<ArgumentDefinition>
2876+
name: Arg3
2877+
type_definition: *i1
2878+
comment: "bool isSaturated"
28752879
attributes:
28762880
- !AttributeID "NoUnwind"
28772881
memory_effects:
@@ -2896,6 +2900,10 @@ intrinsics:
28962900
name: Arg2
28972901
type_definition: *i32
28982902
comment: "b of uchar4"
2903+
- !<ArgumentDefinition>
2904+
name: Arg3
2905+
type_definition: *i1
2906+
comment: "bool isSaturated"
28992907
attributes:
29002908
- !AttributeID "NoUnwind"
29012909
memory_effects:
@@ -2920,6 +2928,10 @@ intrinsics:
29202928
name: Arg2
29212929
type_definition: *i32
29222930
comment: "b of char4"
2931+
- !<ArgumentDefinition>
2932+
name: Arg3
2933+
type_definition: *i1
2934+
comment: "bool isSaturated"
29232935
attributes:
29242936
- !AttributeID "NoUnwind"
29252937
memory_effects:
@@ -2944,6 +2956,10 @@ intrinsics:
29442956
name: Arg2
29452957
type_definition: *i32
29462958
comment: "b of uchar4"
2959+
- !<ArgumentDefinition>
2960+
name: Arg3
2961+
type_definition: *i1
2962+
comment: "bool isSaturated"
29472963
attributes:
29482964
- !AttributeID "NoUnwind"
29492965
memory_effects:

0 commit comments

Comments
 (0)