@@ -175,14 +175,12 @@ define half @v_fdiv_f16(half %a, half %b) {
175
175
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176
176
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
177
177
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
178
- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
179
- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
180
178
; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
181
179
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
182
180
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
183
- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
184
- ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
185
- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
181
+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
182
+ ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
183
+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
186
184
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
187
185
; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
188
186
; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -213,14 +211,12 @@ define half @v_fdiv_f16(half %a, half %b) {
213
211
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214
212
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
215
213
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
216
- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
217
- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
218
214
; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
219
215
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
220
216
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
221
- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
222
- ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
223
- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
217
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
218
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
219
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
224
220
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
225
221
; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
226
222
; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -491,14 +487,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
491
487
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492
488
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
493
489
; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
494
- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
495
- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
496
490
; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
497
491
; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
498
492
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
499
- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
500
- ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
501
- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
493
+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
494
+ ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
495
+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
502
496
; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
503
497
; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
504
498
; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -529,14 +523,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
529
523
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530
524
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
531
525
; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
532
- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
533
- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
534
526
; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
535
527
; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
536
528
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
537
- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
538
- ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
539
- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
529
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
530
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
531
+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
540
532
; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
541
533
; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
542
534
; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
0 commit comments