@@ -22,28 +22,28 @@ def CC_SI_Gfx : CallingConv<[
22
22
// 32 is reserved for the stack pointer
23
23
// 33 is reserved for the frame pointer
24
24
// 34 is reserved for the base pointer
25
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
25
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
26
26
SGPR4, SGPR5, SGPR6, SGPR7,
27
27
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
28
28
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
29
29
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
30
30
]>>>,
31
31
32
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
32
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
33
33
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
34
34
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
35
35
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
36
36
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
37
37
]>>>,
38
38
39
- CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
39
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16 ], CCAssignToStack<4, 4>>
40
40
]>;
41
41
42
42
def RetCC_SI_Gfx : CallingConv<[
43
43
CCIfType<[i1], CCPromoteToType<i32>>,
44
44
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
45
45
46
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
46
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
47
47
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
48
48
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
49
49
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -66,7 +66,7 @@ def RetCC_SI_Gfx : CallingConv<[
66
66
67
67
def CC_SI_SHADER : CallingConv<[
68
68
69
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
69
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
70
70
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
71
71
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
72
72
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -76,7 +76,7 @@ def CC_SI_SHADER : CallingConv<[
76
76
]>>>,
77
77
78
78
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
79
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
79
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
80
80
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
81
81
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
82
82
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -109,7 +109,7 @@ def RetCC_SI_Shader : CallingConv<[
109
109
]>>,
110
110
111
111
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
112
- CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
112
+ CCIfType<[f32, f16, v2f16, bf16, v2bf16 ] , CCAssignToReg<[
113
113
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
114
114
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
115
115
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -188,23 +188,23 @@ def CC_AMDGPU_Func : CallingConv<[
188
188
CCIfType<[i1], CCPromoteToType<i32>>,
189
189
CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
190
190
191
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
191
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
192
192
!foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
193
193
>>>,
194
194
195
- CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
195
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16 ], CCAssignToReg<[
196
196
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
197
197
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
198
198
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
199
199
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
200
- CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
200
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16 ], CCAssignToStack<4, 4>>
201
201
]>;
202
202
203
203
// Calling convention for leaf functions
204
204
def RetCC_AMDGPU_Func : CallingConv<[
205
205
CCIfType<[i1], CCPromoteToType<i32>>,
206
206
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
207
- CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
207
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16 ], CCAssignToReg<[
208
208
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
209
209
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
210
210
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -223,11 +223,11 @@ def CC_AMDGPU : CallingConv<[
223
223
]>;
224
224
225
225
def CC_AMDGPU_CS_CHAIN : CallingConv<[
226
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
226
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
227
227
!foreach(i, !range(105), !cast<Register>("SGPR"#i))
228
228
>>>,
229
229
230
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
230
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16 ] , CCAssignToReg<
231
231
!foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
232
232
>>>
233
233
]>;
0 commit comments