@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
49
49
; GCN-NEXT: v_mov_b32_e32 v9, s13
50
50
; GCN-NEXT: v_mov_b32_e32 v10, s14
51
51
; GCN-NEXT: v_mov_b32_e32 v11, s15
52
- ; GCN-NEXT: s_nop 3
52
+ ; GCN-NEXT: s_nop 4
53
53
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
54
54
; GCN-NEXT: s_waitcnt vmcnt(0)
55
55
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122
122
; GCN-NEXT: v_mov_b32_e32 v9, s13
123
123
; GCN-NEXT: v_mov_b32_e32 v10, s14
124
124
; GCN-NEXT: v_mov_b32_e32 v11, s15
125
- ; GCN-NEXT: s_nop 3
125
+ ; GCN-NEXT: s_nop 4
126
126
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127
127
; GCN-NEXT: s_waitcnt vmcnt(0)
128
128
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179
179
; GCN-NEXT: s_nop 1
180
180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181
181
; GCN-NEXT: s_nop 7
182
- ; GCN-NEXT: s_nop 2
182
+ ; GCN-NEXT: s_nop 3
183
183
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184
184
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185
185
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224
224
; GCN-NEXT: s_nop 1
225
225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226
226
; GCN-NEXT: s_nop 7
227
- ; GCN-NEXT: s_nop 2
227
+ ; GCN-NEXT: s_nop 3
228
228
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229
229
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230
230
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -396,7 +396,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
396
396
; GCN-NEXT: v_mov_b32_e32 v16, 0
397
397
; GCN-NEXT: s_waitcnt lgkmcnt(0)
398
398
; GCN-NEXT: s_nop 7
399
- ; GCN-NEXT: s_nop 0
399
+ ; GCN-NEXT: s_nop 1
400
400
; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
401
401
; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
402
402
; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -431,7 +431,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
431
431
; GCN-NEXT: v_mov_b32_e32 v16, 0
432
432
; GCN-NEXT: s_waitcnt lgkmcnt(0)
433
433
; GCN-NEXT: s_nop 7
434
- ; GCN-NEXT: s_nop 0
434
+ ; GCN-NEXT: s_nop 1
435
435
; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
436
436
; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
437
437
; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
0 commit comments