Skip to content

Commit e842998

Browse files
AMDGPU/GlobalISel: Add test for fma_mix with source from unmerge (llvm#102129)
When selecting fma_mix with operand that comes from G_UNMERGE_VALUES, there is a bug where folded register is operand 0 of G_UNMERGE_VALUES. Source modifiers are correctly selected. isExtractHiElt returns G_UNMERGE_VALUES that defines source register but does not specify which operand.
1 parent 6c8933e commit e842998

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,43 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
441441
ret <4 x float> %d
442442
}
443443

444+
define amdgpu_ps float @test_matching_source_from_unmerge(ptr addrspace(3) %aptr, float %b) {
445+
; GFX9-DENORM-LABEL: test_matching_source_from_unmerge:
446+
; GFX9-DENORM: ; %bb.0: ; %.entry
447+
; GFX9-DENORM-NEXT: ds_read_b64 v[2:3], v0
448+
; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
449+
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
450+
; GFX9-DENORM-NEXT: ; return to shader part epilog
451+
;
452+
; GFX10-LABEL: test_matching_source_from_unmerge:
453+
; GFX10: ; %bb.0: ; %.entry
454+
; GFX10-NEXT: ds_read_b64 v[2:3], v0
455+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
456+
; GFX10-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
457+
; GFX10-NEXT: ; return to shader part epilog
458+
;
459+
; GFX10-CONTRACT-LABEL: test_matching_source_from_unmerge:
460+
; GFX10-CONTRACT: ; %bb.0: ; %.entry
461+
; GFX10-CONTRACT-NEXT: ds_read_b64 v[2:3], v0
462+
; GFX10-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
463+
; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
464+
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
465+
;
466+
; GFX10-DENORM-LABEL: test_matching_source_from_unmerge:
467+
; GFX10-DENORM: ; %bb.0: ; %.entry
468+
; GFX10-DENORM-NEXT: ds_read_b64 v[2:3], v0
469+
; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
470+
; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
471+
; GFX10-DENORM-NEXT: ; return to shader part epilog
472+
.entry:
473+
%a = load <4 x half>, ptr addrspace(3) %aptr, align 16
474+
%a_f32 = fpext <4 x half> %a to <4 x float>
475+
%.a3_f32 = extractelement <4 x float> %a_f32, i64 3
476+
%.a1_f32 = extractelement <4 x float> %a_f32, i64 1
477+
%res = call float @llvm.fmuladd.f32(float %.a1_f32, float %.a3_f32, float %b)
478+
ret float %res
479+
}
480+
444481
declare float @llvm.fmuladd.f32(float, float, float) #0
445482
declare half @llvm.fmuladd.f16(half, half, half) #0
446483
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0

0 commit comments

Comments
 (0)