@@ -37,14 +37,13 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc)
37
37
#define DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
38
38
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b) \
39
39
{ \
40
- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
40
+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
41
41
}
42
42
43
- #define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB , SAT_PREFIX ) \
44
- TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45
- { \
46
- TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
47
- return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
43
+ #define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
44
+ TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45
+ { \
46
+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
48
47
}
49
48
50
49
#define DEFN_INTEL_DOT_PRODUCT_US (TYPE_RET , TYPE_ARG , MANGLING_OLD , MANGLING_NEW ) \
@@ -62,14 +61,13 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(u##TYPE_ARG a, TYPE_ARG b, TYPE_RET acc
62
61
#define DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
63
62
TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_ARG1 packed) \
64
63
{ \
65
- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
64
+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
66
65
}
67
66
68
- #define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB , SAT_PREFIX ) \
69
- TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
70
- { \
71
- TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
72
- return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
67
+ #define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
68
+ TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
69
+ { \
70
+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
73
71
}
74
72
75
73
#define DEFN_INTEL_DOT_PRODUCT_PACKED (TYPE_RET , ARG_TYPES , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW ) \
@@ -105,12 +103,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32, _
105
103
DEFN_INTEL_DOT_PRODUCT_PACKED (uint , uu , U , _i32_i32_i32 , _Ruint )
106
104
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
107
105
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
108
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint , uu , u )
106
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint , uu )
109
107
DEFN_INTEL_DOT_PRODUCT_SAT (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint )
110
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint , uu , u )
108
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint , uu )
111
109
DEFN_INTEL_DOT_PRODUCT_SAT (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint )
112
110
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
113
- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (uint , uint , uint , U , _i32_i32_i32_i32 , _Ruint , uu , u )
111
+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (uint , uint , uint , U , _i32_i32_i32_i32 , _Ruint , uu )
114
112
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (uint , U , uu , _i32_i32_i32_i32 , _Ruint )
115
113
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
116
114
#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -125,12 +123,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32, _Rin
125
123
DEFN_INTEL_DOT_PRODUCT_PACKED (int , ss , S , _i32_i32_i32 , _Rint )
126
124
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
127
125
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
128
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint , ss , s )
126
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint , ss )
129
127
DEFN_INTEL_DOT_PRODUCT_SAT (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint )
130
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint , ss , s )
128
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint , ss )
131
129
DEFN_INTEL_DOT_PRODUCT_SAT (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint )
132
130
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
133
- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , int , S , _i32_i32_i32_i32 , _Rint , ss , s )
131
+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , int , S , _i32_i32_i32_i32 , _Rint , ss )
134
132
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (int , S , ss , _i32_i32_i32_i32 , _Rint )
135
133
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
136
134
#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -149,14 +147,14 @@ DEFN_INTEL_DOT_PRODUCT_PACKED(int, su, SU, _i32_i32_i32, _Rint)
149
147
DEFN_INTEL_DOT_PRODUCT_PACKED_US
150
148
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
151
149
#ifdef __opencl_c_integer_dot_product_saturation_accumulation
152
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint , su , s )
150
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint , su )
153
151
DEFN_INTEL_DOT_PRODUCT_SAT (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint )
154
152
DEFN_INTEL_DOT_PRODUCT_SAT_US (int , char4 , _v4i8_v4i8_i32 , _Rint )
155
- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint , su , s )
153
+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint , su )
156
154
DEFN_INTEL_DOT_PRODUCT_SAT (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint )
157
155
DEFN_INTEL_DOT_PRODUCT_SAT_US (int , short2 , _v2i16_v2i16_i32 , _Rint )
158
156
#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
159
- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , uint , SU , _i32_i32_i32_i32 , _Rint , su , s )
157
+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , uint , SU , _i32_i32_i32_i32 , _Rint , su )
160
158
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (int , SU , su , _i32_i32_i32_i32 , _Rint )
161
159
DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US
162
160
#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
0 commit comments