@@ -33,14 +33,12 @@ gpu.module @kernels {
33
33
// CHECK-SUB: %[[V2:.+]] = vector.insert %[[R2]], %[[V1]] [4] : f16 into vector<5xf16>
34
34
// CHECK-SUB: "test.consume"(%[[V2]]) : (vector<5xf16>) -> ()
35
35
// CHECK-DPP-COUNT-6: amdgpu.dpp
36
- // CHECK-DPP: rocdl.readlane
37
36
%sum0 = gpu.subgroup_reduce add %arg0 : (vector <5 xf16 >) -> (vector <5 xf16 >)
38
37
" test.consume" (%sum0 ) : (vector <5 xf16 >) -> ()
39
38
40
39
// CHECK-SUB-COUNT-3: gpu.subgroup_reduce mul {{.+}} uniform
41
40
// CHECK-SUB: "test.consume"
42
41
// CHECK-DPP-COUNT-6: amdgpu.dpp
43
- // CHECK-DPP: rocdl.readlane
44
42
%sum1 = gpu.subgroup_reduce mul %arg0 uniform : (vector <5 xf16 >) -> (vector <5 xf16 >)
45
43
" test.consume" (%sum1 ) : (vector <5 xf16 >) -> ()
46
44
@@ -71,14 +69,12 @@ gpu.module @kernels {
71
69
// CHECK-SUB: %[[V0:.+]] = vector.broadcast %[[R0]] : f32 to vector<1xf32>
72
70
// CHECK-SUB: "test.consume"(%[[V0]]) : (vector<1xf32>) -> ()
73
71
// CHECK-DPP-COUNT-6: amdgpu.dpp
74
- // CHECK-DPP: rocdl.readlane
75
72
%sum0 = gpu.subgroup_reduce add %arg0 : (vector <1 xf32 >) -> (vector <1 xf32 >)
76
73
" test.consume" (%sum0 ) : (vector <1 xf32 >) -> ()
77
74
78
75
// CHECK-SUB: gpu.subgroup_reduce add {{.+}} uniform : (f32) -> f32
79
76
// CHECK-SUB: "test.consume"
80
77
// CHECK-DPP-COUNT-6: amdgpu.dpp
81
- // CHECK-DPP: rocdl.readlane
82
78
%sum1 = gpu.subgroup_reduce add %arg0 uniform : (vector <1 xf32 >) -> (vector <1 xf32 >)
83
79
" test.consume" (%sum1 ) : (vector <1 xf32 >) -> ()
84
80
@@ -148,7 +144,6 @@ gpu.module @kernels {
148
144
// CHECK-SHFL: "test.consume"(%[[A4]]) : (i32) -> ()
149
145
150
146
// CHECK-DPP-COUNT-6: amdgpu.dpp
151
- // CHECK-DPP: rocdl.readlane
152
147
%sum0 = gpu.subgroup_reduce add %arg0 : (i32 ) -> i32
153
148
" test.consume" (%sum0 ) : (i32 ) -> ()
154
149
@@ -282,7 +277,6 @@ gpu.module @kernels {
282
277
// CHECK-SHFL: %[[AL:.+]] = arith.addi {{.+}} : i16
283
278
// CHECK-SHFL: "test.consume"(%[[AL]]) : (i16) -> ()
284
279
// CHECK-DPP-COUNT-6: amdgpu.dpp
285
- // CHECK-DPP: rocdl.readlane
286
280
%sum0 = gpu.subgroup_reduce add %arg0 : (i16 ) -> i16
287
281
" test.consume" (%sum0 ) : (i16 ) -> ()
288
282
0 commit comments