11
11
; GFX8-NOOPT: s_nop 1
12
12
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
13
13
define amdgpu_kernel void @dpp_test (ptr addrspace (1 ) %out , i32 %in1 , i32 %in2 ) {
14
- %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32 (i32 %in1 , i32 %in2 , i32 1 , i32 1 , i32 1 , i1 0 ) #0
14
+ %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32 (i32 %in1 , i32 %in2 , i32 1 , i32 1 , i32 1 , i1 false ) #0
15
15
store i32 %tmp0 , ptr addrspace (1 ) %out
16
16
ret void
17
17
}
@@ -24,7 +24,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
24
24
; GFX8-NOOPT: s_nop 1
25
25
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1{{$}}
26
26
define amdgpu_kernel void @dpp_test_bc (ptr addrspace (1 ) %out , i32 %in1 , i32 %in2 ) {
27
- %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32 (i32 %in1 , i32 %in2 , i32 2 , i32 1 , i32 1 , i1 1 ) #0
27
+ %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32 (i32 %in1 , i32 %in2 , i32 2 , i32 1 , i32 1 , i1 true ) #0
28
28
store i32 %tmp0 , ptr addrspace (1 ) %out
29
29
ret void
30
30
}
@@ -63,7 +63,7 @@ define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i6
63
63
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
64
64
%gep = getelementptr inbounds i64 , ptr addrspace (1 ) %arg , i32 %id
65
65
%load = load i64 , ptr addrspace (1 ) %gep
66
- %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 %in1 , i64 %load , i32 1 , i32 1 , i32 1 , i1 0 ) #0
66
+ %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 %in1 , i64 %load , i32 1 , i32 1 , i32 1 , i1 false ) #0
67
67
store i64 %tmp0 , ptr addrspace (1 ) %gep
68
68
ret void
69
69
}
@@ -83,7 +83,7 @@ define amdgpu_kernel void @update_dpp64_imm_old_test(ptr addrspace(1) %arg, i64
83
83
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
84
84
%gep = getelementptr inbounds i64 , ptr addrspace (1 ) %arg , i32 %id
85
85
%load = load i64 , ptr addrspace (1 ) %gep
86
- %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 123451234512345 , i64 %load , i32 1 , i32 1 , i32 1 , i1 0 ) #0
86
+ %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 123451234512345 , i64 %load , i32 1 , i32 1 , i32 1 , i1 false ) #0
87
87
store i64 %tmp0 , ptr addrspace (1 ) %gep
88
88
ret void
89
89
}
@@ -98,14 +98,133 @@ define amdgpu_kernel void @update_dpp64_imm_old_test(ptr addrspace(1) %arg, i64
98
98
; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
99
99
; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
100
100
define amdgpu_kernel void @update_dpp64_imm_src_test (ptr addrspace (1 ) %out , i64 %in1 ) {
101
- %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 %in1 , i64 123451234512345 , i32 1 , i32 1 , i32 1 , i1 0 ) #0
101
+ %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64 (i64 %in1 , i64 123451234512345 , i32 1 , i32 1 , i32 1 , i1 false ) #0
102
102
store i64 %tmp0 , ptr addrspace (1 ) %out
103
103
ret void
104
104
}
105
105
106
+ ; GCN-LABEL: {{^}}dpp_test_f32:
107
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
108
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
109
+ ; GFX8-OPT: s_mov
110
+ ; GFX8-OPT: s_mov
111
+ ; GFX8-NOOPT: s_nop 1
112
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
113
+ define amdgpu_kernel void @dpp_test_f32 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
114
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 1 , i32 1 , i32 1 , i1 false )
115
+ store float %tmp0 , ptr addrspace (1 ) %out
116
+ ret void
117
+ }
118
+
119
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb1:
120
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
121
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
122
+ ; GFX8-OPT: s_mov
123
+ ; GFX8-OPT: s_mov
124
+ ; GFX8-NOOPT: s_nop 1
125
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}}
126
+ define amdgpu_kernel void @dpp_test_f32_imm_comb1 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
127
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 0 , i32 0 , i32 0 , i1 false )
128
+ store float %tmp0 , ptr addrspace (1 ) %out
129
+ ret void
130
+ }
131
+
132
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb2:
133
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
134
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
135
+ ; GFX8-OPT: s_mov
136
+ ; GFX8-OPT: s_mov
137
+ ; GFX8-NOOPT: s_nop 1
138
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}}
139
+ define amdgpu_kernel void @dpp_test_f32_imm_comb2 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
140
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 3 , i32 3 , i32 3 , i1 false )
141
+ store float %tmp0 , ptr addrspace (1 ) %out
142
+ ret void
143
+ }
144
+
145
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb3:
146
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
147
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
148
+ ; GFX8-OPT: s_mov
149
+ ; GFX8-OPT: s_mov
150
+ ; GFX8-NOOPT: s_nop 1
151
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}}
152
+ define amdgpu_kernel void @dpp_test_f32_imm_comb3 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
153
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 1 , i32 2 , i32 3 , i1 true )
154
+ store float %tmp0 , ptr addrspace (1 ) %out
155
+ ret void
156
+ }
157
+
158
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb4:
159
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
160
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
161
+ ; GFX8-OPT: s_mov
162
+ ; GFX8-OPT: s_mov
163
+ ; GFX8-NOOPT: s_nop 1
164
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}}
165
+ define amdgpu_kernel void @dpp_test_f32_imm_comb4 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
166
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 4 , i32 3 , i32 2 , i1 true )
167
+ store float %tmp0 , ptr addrspace (1 ) %out
168
+ ret void
169
+ }
170
+
171
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb5:
172
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
173
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
174
+ ; GFX8-OPT: s_mov
175
+ ; GFX8-OPT: s_mov
176
+ ; GFX8-NOOPT: s_nop 1
177
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
178
+ define amdgpu_kernel void @dpp_test_f32_imm_comb5 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
179
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 62 , i32 61 , i1 true )
180
+ store float %tmp0 , ptr addrspace (1 ) %out
181
+ ret void
182
+ }
183
+
184
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb6:
185
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
186
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
187
+ ; GFX8-OPT: s_mov
188
+ ; GFX8-OPT: s_mov
189
+ ; GFX8-NOOPT: s_nop 1
190
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
191
+ define amdgpu_kernel void @dpp_test_f32_imm_comb6 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
192
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 63 , i32 63 , i32 63 , i1 true )
193
+ store float %tmp0 , ptr addrspace (1 ) %out
194
+ ret void
195
+ }
196
+
197
+
198
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb7:
199
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
200
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
201
+ ; GFX8-OPT: s_mov
202
+ ; GFX8-OPT: s_mov
203
+ ; GFX8-NOOPT: s_nop 1
204
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
205
+ define amdgpu_kernel void @dpp_test_f32_imm_comb7 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
206
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 64 , i32 64 , i32 64 , i1 true )
207
+ store float %tmp0 , ptr addrspace (1 ) %out
208
+ ret void
209
+ }
210
+
211
+ ; GCN-LABEL: {{^}}dpp_test_f32_imm_comb8:
212
+ ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
213
+ ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
214
+ ; GFX8-OPT: s_mov
215
+ ; GFX8-OPT: s_mov
216
+ ; GFX8-NOOPT: s_nop 1
217
+ ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
218
+ define amdgpu_kernel void @dpp_test_f32_imm_comb8 (ptr addrspace (1 ) %out , float %in1 , float %in2 ) {
219
+ %tmp0 = call float @llvm.amdgcn.update.dpp.f32 (float %in1 , float %in2 , i32 31 , i32 63 , i32 128 , i1 true )
220
+ store float %tmp0 , ptr addrspace (1 ) %out
221
+ ret void
222
+ }
223
+
106
224
declare i32 @llvm.amdgcn.workitem.id.x ()
107
225
declare void @llvm.amdgcn.s.barrier ()
108
226
declare i32 @llvm.amdgcn.update.dpp.i32 (i32 , i32 , i32 , i32 , i32 , i1 ) #0
227
+ declare float @llvm.amdgcn.update.dpp.f32 (float , float , i32 , i32 , i32 , i1 ) #0
109
228
declare i64 @llvm.amdgcn.update.dpp.i64 (i64 , i64 , i32 , i32 , i32 , i1 ) #0
110
229
111
230
attributes #0 = { nounwind readnone convergent }
0 commit comments