Skip to content

Commit 1233e2b

Browse files
committed
Update test
1 parent a686b6b commit 1233e2b

File tree

1 file changed

+66
-90
lines changed

1 file changed

+66
-90
lines changed

llvm/test/CodeGen/AMDGPU/wave32.ll

Lines changed: 66 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,22 +2568,23 @@ if.end2: ; preds = %if.end
25682568
define amdgpu_kernel void @fcmp64(float %n, float %s) {
25692569
; GFX1032-LABEL: fcmp64:
25702570
; GFX1032: ; %bb.0: ; %entry
2571-
; GFX1032-NEXT: s_load_dword s0, s[4:5], 0x28
2571+
; GFX1032-NEXT: s_load_dword s1, s[4:5], 0x28
25722572
; GFX1032-NEXT: v_cvt_f32_u32_e32 v0, v0
25732573
; GFX1032-NEXT: ; implicit-def: $vgpr1
25742574
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
2575-
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s1, v0, |s0|
2576-
; GFX1032-NEXT: s_and_saveexec_b32 s2, s1
2577-
; GFX1032-NEXT: s_xor_b32 s1, exec_lo, s2
2575+
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0, v0, |s1|
2576+
; GFX1032-NEXT: s_and_saveexec_b32 s2, s0
2577+
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s2
25782578
; GFX1032-NEXT: ; %bb.1: ; %frem.else
25792579
; GFX1032-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2580-
; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s0|
2580+
; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s1|
25812581
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
2582+
; GFX1032-NEXT: ; implicit-def: $vgpr0
25822583
; GFX1032-NEXT: ; %bb.2: ; %Flow13
2583-
; GFX1032-NEXT: s_andn2_saveexec_b32 s1, s1
2584+
; GFX1032-NEXT: s_andn2_saveexec_b32 s0, s0
25842585
; GFX1032-NEXT: s_cbranch_execz .LBB51_8
25852586
; GFX1032-NEXT: ; %bb.3: ; %frem.compute
2586-
; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s0|
2587+
; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s1|
25872588
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
25882589
; GFX1032-NEXT: v_frexp_mant_f32_e32 v8, v0
25892590
; GFX1032-NEXT: v_ldexp_f32 v1, v1, 1
@@ -2595,19 +2596,19 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
25952596
; GFX1032-NEXT: v_mul_f32_e32 v4, v5, v3
25962597
; GFX1032-NEXT: v_fma_f32 v6, -v2, v4, v5
25972598
; GFX1032-NEXT: v_fmac_f32_e32 v4, v6, v3
2598-
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s0
2599+
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s1
25992600
; GFX1032-NEXT: v_fma_f32 v5, -v2, v4, v5
26002601
; GFX1032-NEXT: v_add_nc_u32_e32 v2, -1, v6
26012602
; GFX1032-NEXT: v_div_fmas_f32 v3, v5, v3, v4
26022603
; GFX1032-NEXT: v_xad_u32 v4, v2, -1, v7
26032604
; GFX1032-NEXT: v_ldexp_f32 v5, v8, 12
26042605
; GFX1032-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
26052606
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 12, v4
2606-
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
2607+
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
26072608
; GFX1032-NEXT: s_cbranch_execz .LBB51_7
26082609
; GFX1032-NEXT: ; %bb.4: ; %frem.loop_body.preheader
26092610
; GFX1032-NEXT: v_sub_nc_u32_e32 v4, v7, v6
2610-
; GFX1032-NEXT: s_mov_b32 s3, 0
2611+
; GFX1032-NEXT: s_mov_b32 s2, 0
26112612
; GFX1032-NEXT: v_add_nc_u32_e32 v4, 12, v4
26122613
; GFX1032-NEXT: .LBB51_5: ; %frem.loop_body
26132614
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2621,15 +2622,16 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
26212622
; GFX1032-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
26222623
; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
26232624
; GFX1032-NEXT: v_ldexp_f32 v5, v5, 12
2624-
; GFX1032-NEXT: s_or_b32 s3, vcc_lo, s3
2625-
; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s3
2625+
; GFX1032-NEXT: s_or_b32 s2, vcc_lo, s2
2626+
; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
26262627
; GFX1032-NEXT: s_cbranch_execnz .LBB51_5
26272628
; GFX1032-NEXT: ; %bb.6: ; %Flow
2628-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3
2629+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
26292630
; GFX1032-NEXT: v_mov_b32_e32 v5, v6
26302631
; GFX1032-NEXT: .LBB51_7: ; %Flow12
2631-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2632+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
26322633
; GFX1032-NEXT: v_add_nc_u32_e32 v4, -11, v4
2634+
; GFX1032-NEXT: v_and_b32_e32 v0, 0x80000000, v0
26332635
; GFX1032-NEXT: v_ldexp_f32 v4, v5, v4
26342636
; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v3
26352637
; GFX1032-NEXT: v_rndne_f32_e32 v3, v3
@@ -2638,21 +2640,13 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
26382640
; GFX1032-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v3
26392641
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
26402642
; GFX1032-NEXT: v_ldexp_f32 v1, v1, v2
2641-
; GFX1032-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2642-
; GFX1032-NEXT: v_xor_b32_e32 v1, v2, v1
2643+
; GFX1032-NEXT: v_xor_b32_e32 v1, v0, v1
26432644
; GFX1032-NEXT: .LBB51_8: ; %Flow14
2644-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2645-
; GFX1032-NEXT: v_cmp_class_f32_e64 s1, s0, 3
2646-
; GFX1032-NEXT: v_cmp_class_f32_e64 s0, s0, 0x60
2647-
; GFX1032-NEXT: v_cmp_class_f32_e64 s2, v0, 0x1f8
2648-
; GFX1032-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s0
2649-
; GFX1032-NEXT: s_xor_b32 s0, s1, -1
2645+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
2646+
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v1
26502647
; GFX1032-NEXT: s_brev_b32 s1, 1
2651-
; GFX1032-NEXT: s_and_b32 vcc_lo, s0, s2
2652-
; GFX1032-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
2653-
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
26542648
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2655-
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2649+
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v1
26562650
; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
26572651
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
26582652
; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
@@ -2665,34 +2659,35 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
26652659
;
26662660
; GFX1064-LABEL: fcmp64:
26672661
; GFX1064: ; %bb.0: ; %entry
2668-
; GFX1064-NEXT: s_load_dword s6, s[4:5], 0x28
2662+
; GFX1064-NEXT: s_load_dword s2, s[4:5], 0x28
26692663
; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
26702664
; GFX1064-NEXT: ; implicit-def: $vgpr1
26712665
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
2672-
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s6|
2673-
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
2674-
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[2:3]
2666+
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s2|
2667+
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
2668+
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
26752669
; GFX1064-NEXT: ; %bb.1: ; %frem.else
26762670
; GFX1064-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2677-
; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s6|
2671+
; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s2|
26782672
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
2673+
; GFX1064-NEXT: ; implicit-def: $vgpr0
26792674
; GFX1064-NEXT: ; %bb.2: ; %Flow13
26802675
; GFX1064-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
26812676
; GFX1064-NEXT: s_cbranch_execz .LBB51_8
26822677
; GFX1064-NEXT: ; %bb.3: ; %frem.compute
2683-
; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s6|
2678+
; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s2|
26842679
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
26852680
; GFX1064-NEXT: v_frexp_mant_f32_e32 v8, v0
26862681
; GFX1064-NEXT: v_ldexp_f32 v1, v1, 1
2687-
; GFX1064-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, 1.0
2682+
; GFX1064-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0
26882683
; GFX1064-NEXT: v_div_scale_f32 v5, vcc, 1.0, v1, 1.0
26892684
; GFX1064-NEXT: v_rcp_f32_e32 v3, v2
26902685
; GFX1064-NEXT: v_fma_f32 v4, -v2, v3, 1.0
26912686
; GFX1064-NEXT: v_fmac_f32_e32 v3, v4, v3
26922687
; GFX1064-NEXT: v_mul_f32_e32 v4, v5, v3
26932688
; GFX1064-NEXT: v_fma_f32 v6, -v2, v4, v5
26942689
; GFX1064-NEXT: v_fmac_f32_e32 v4, v6, v3
2695-
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s6
2690+
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s2
26962691
; GFX1064-NEXT: v_fma_f32 v5, -v2, v4, v5
26972692
; GFX1064-NEXT: v_add_nc_u32_e32 v2, -1, v6
26982693
; GFX1064-NEXT: v_div_fmas_f32 v3, v5, v3, v4
@@ -2727,6 +2722,7 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
27272722
; GFX1064-NEXT: .LBB51_7: ; %Flow12
27282723
; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3]
27292724
; GFX1064-NEXT: v_add_nc_u32_e32 v4, -11, v4
2725+
; GFX1064-NEXT: v_and_b32_e32 v0, 0x80000000, v0
27302726
; GFX1064-NEXT: v_ldexp_f32 v4, v5, v4
27312727
; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v3
27322728
; GFX1064-NEXT: v_rndne_f32_e32 v3, v3
@@ -2735,20 +2731,12 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
27352731
; GFX1064-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3
27362732
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
27372733
; GFX1064-NEXT: v_ldexp_f32 v1, v1, v2
2738-
; GFX1064-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2739-
; GFX1064-NEXT: v_xor_b32_e32 v1, v2, v1
2734+
; GFX1064-NEXT: v_xor_b32_e32 v1, v0, v1
27402735
; GFX1064-NEXT: .LBB51_8: ; %Flow14
27412736
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
2742-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[4:5], s6, 0x60
2743-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[0:1], s6, 3
2744-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[2:3], v0, 0x1f8
2745-
; GFX1064-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s[4:5]
2746-
; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], -1
2747-
; GFX1064-NEXT: s_and_b64 vcc, s[0:1], s[2:3]
2748-
; GFX1064-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc
2749-
; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2737+
; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
27502738
; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
2751-
; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
2739+
; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v1
27522740
; GFX1064-NEXT: s_bitset1_b32 s1, 31
27532741
; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
27542742
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
@@ -2884,22 +2872,23 @@ if.end2: ; preds = %if.end
28842872
define amdgpu_kernel void @fcmp32(float %n, float %s) {
28852873
; GFX1032-LABEL: fcmp32:
28862874
; GFX1032: ; %bb.0: ; %entry
2887-
; GFX1032-NEXT: s_load_dword s0, s[4:5], 0x28
2875+
; GFX1032-NEXT: s_load_dword s1, s[4:5], 0x28
28882876
; GFX1032-NEXT: v_cvt_f32_u32_e32 v0, v0
28892877
; GFX1032-NEXT: ; implicit-def: $vgpr1
28902878
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
2891-
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s1, v0, |s0|
2892-
; GFX1032-NEXT: s_and_saveexec_b32 s2, s1
2893-
; GFX1032-NEXT: s_xor_b32 s1, exec_lo, s2
2879+
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0, v0, |s1|
2880+
; GFX1032-NEXT: s_and_saveexec_b32 s2, s0
2881+
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s2
28942882
; GFX1032-NEXT: ; %bb.1: ; %frem.else
28952883
; GFX1032-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2896-
; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s0|
2884+
; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s1|
28972885
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
2886+
; GFX1032-NEXT: ; implicit-def: $vgpr0
28982887
; GFX1032-NEXT: ; %bb.2: ; %Flow13
2899-
; GFX1032-NEXT: s_andn2_saveexec_b32 s1, s1
2888+
; GFX1032-NEXT: s_andn2_saveexec_b32 s0, s0
29002889
; GFX1032-NEXT: s_cbranch_execz .LBB53_8
29012890
; GFX1032-NEXT: ; %bb.3: ; %frem.compute
2902-
; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s0|
2891+
; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s1|
29032892
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
29042893
; GFX1032-NEXT: v_frexp_mant_f32_e32 v8, v0
29052894
; GFX1032-NEXT: v_ldexp_f32 v1, v1, 1
@@ -2911,19 +2900,19 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
29112900
; GFX1032-NEXT: v_mul_f32_e32 v4, v5, v3
29122901
; GFX1032-NEXT: v_fma_f32 v6, -v2, v4, v5
29132902
; GFX1032-NEXT: v_fmac_f32_e32 v4, v6, v3
2914-
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s0
2903+
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s1
29152904
; GFX1032-NEXT: v_fma_f32 v5, -v2, v4, v5
29162905
; GFX1032-NEXT: v_add_nc_u32_e32 v2, -1, v6
29172906
; GFX1032-NEXT: v_div_fmas_f32 v3, v5, v3, v4
29182907
; GFX1032-NEXT: v_xad_u32 v4, v2, -1, v7
29192908
; GFX1032-NEXT: v_ldexp_f32 v5, v8, 12
29202909
; GFX1032-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
29212910
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 12, v4
2922-
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
2911+
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
29232912
; GFX1032-NEXT: s_cbranch_execz .LBB53_7
29242913
; GFX1032-NEXT: ; %bb.4: ; %frem.loop_body.preheader
29252914
; GFX1032-NEXT: v_sub_nc_u32_e32 v4, v7, v6
2926-
; GFX1032-NEXT: s_mov_b32 s3, 0
2915+
; GFX1032-NEXT: s_mov_b32 s2, 0
29272916
; GFX1032-NEXT: v_add_nc_u32_e32 v4, 12, v4
29282917
; GFX1032-NEXT: .LBB53_5: ; %frem.loop_body
29292918
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2937,15 +2926,16 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
29372926
; GFX1032-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
29382927
; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
29392928
; GFX1032-NEXT: v_ldexp_f32 v5, v5, 12
2940-
; GFX1032-NEXT: s_or_b32 s3, vcc_lo, s3
2941-
; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s3
2929+
; GFX1032-NEXT: s_or_b32 s2, vcc_lo, s2
2930+
; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
29422931
; GFX1032-NEXT: s_cbranch_execnz .LBB53_5
29432932
; GFX1032-NEXT: ; %bb.6: ; %Flow
2944-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3
2933+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
29452934
; GFX1032-NEXT: v_mov_b32_e32 v5, v6
29462935
; GFX1032-NEXT: .LBB53_7: ; %Flow12
2947-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2936+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
29482937
; GFX1032-NEXT: v_add_nc_u32_e32 v4, -11, v4
2938+
; GFX1032-NEXT: v_and_b32_e32 v0, 0x80000000, v0
29492939
; GFX1032-NEXT: v_ldexp_f32 v4, v5, v4
29502940
; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v3
29512941
; GFX1032-NEXT: v_rndne_f32_e32 v3, v3
@@ -2954,20 +2944,12 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
29542944
; GFX1032-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v3
29552945
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
29562946
; GFX1032-NEXT: v_ldexp_f32 v1, v1, v2
2957-
; GFX1032-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2958-
; GFX1032-NEXT: v_xor_b32_e32 v1, v2, v1
2947+
; GFX1032-NEXT: v_xor_b32_e32 v1, v0, v1
29592948
; GFX1032-NEXT: .LBB53_8: ; %Flow14
2960-
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2961-
; GFX1032-NEXT: v_cmp_class_f32_e64 s1, s0, 3
2962-
; GFX1032-NEXT: v_cmp_class_f32_e64 s0, s0, 0x60
2963-
; GFX1032-NEXT: v_cmp_class_f32_e64 s2, v0, 0x1f8
2964-
; GFX1032-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s0
2965-
; GFX1032-NEXT: s_xor_b32 s0, s1, -1
2966-
; GFX1032-NEXT: s_and_b32 vcc_lo, s0, s2
2967-
; GFX1032-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
2968-
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2949+
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
2950+
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v1
29692951
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2970-
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2952+
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v1
29712953
; GFX1032-NEXT: s_bitset1_b32 s0, 31
29722954
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
29732955
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
@@ -2981,34 +2963,35 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
29812963
;
29822964
; GFX1064-LABEL: fcmp32:
29832965
; GFX1064: ; %bb.0: ; %entry
2984-
; GFX1064-NEXT: s_load_dword s6, s[4:5], 0x28
2966+
; GFX1064-NEXT: s_load_dword s2, s[4:5], 0x28
29852967
; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
29862968
; GFX1064-NEXT: ; implicit-def: $vgpr1
29872969
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
2988-
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s6|
2989-
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
2990-
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[2:3]
2970+
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s2|
2971+
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
2972+
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
29912973
; GFX1064-NEXT: ; %bb.1: ; %frem.else
29922974
; GFX1064-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2993-
; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s6|
2975+
; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s2|
29942976
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
2977+
; GFX1064-NEXT: ; implicit-def: $vgpr0
29952978
; GFX1064-NEXT: ; %bb.2: ; %Flow13
29962979
; GFX1064-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
29972980
; GFX1064-NEXT: s_cbranch_execz .LBB53_8
29982981
; GFX1064-NEXT: ; %bb.3: ; %frem.compute
2999-
; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s6|
2982+
; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s2|
30002983
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
30012984
; GFX1064-NEXT: v_frexp_mant_f32_e32 v8, v0
30022985
; GFX1064-NEXT: v_ldexp_f32 v1, v1, 1
3003-
; GFX1064-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, 1.0
2986+
; GFX1064-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0
30042987
; GFX1064-NEXT: v_div_scale_f32 v5, vcc, 1.0, v1, 1.0
30052988
; GFX1064-NEXT: v_rcp_f32_e32 v3, v2
30062989
; GFX1064-NEXT: v_fma_f32 v4, -v2, v3, 1.0
30072990
; GFX1064-NEXT: v_fmac_f32_e32 v3, v4, v3
30082991
; GFX1064-NEXT: v_mul_f32_e32 v4, v5, v3
30092992
; GFX1064-NEXT: v_fma_f32 v6, -v2, v4, v5
30102993
; GFX1064-NEXT: v_fmac_f32_e32 v4, v6, v3
3011-
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s6
2994+
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s2
30122995
; GFX1064-NEXT: v_fma_f32 v5, -v2, v4, v5
30132996
; GFX1064-NEXT: v_add_nc_u32_e32 v2, -1, v6
30142997
; GFX1064-NEXT: v_div_fmas_f32 v3, v5, v3, v4
@@ -3043,6 +3026,7 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
30433026
; GFX1064-NEXT: .LBB53_7: ; %Flow12
30443027
; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3]
30453028
; GFX1064-NEXT: v_add_nc_u32_e32 v4, -11, v4
3029+
; GFX1064-NEXT: v_and_b32_e32 v0, 0x80000000, v0
30463030
; GFX1064-NEXT: v_ldexp_f32 v4, v5, v4
30473031
; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v3
30483032
; GFX1064-NEXT: v_rndne_f32_e32 v3, v3
@@ -3051,20 +3035,12 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
30513035
; GFX1064-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3
30523036
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
30533037
; GFX1064-NEXT: v_ldexp_f32 v1, v1, v2
3054-
; GFX1064-NEXT: v_and_b32_e32 v2, 0x80000000, v0
3055-
; GFX1064-NEXT: v_xor_b32_e32 v1, v2, v1
3038+
; GFX1064-NEXT: v_xor_b32_e32 v1, v0, v1
30563039
; GFX1064-NEXT: .LBB53_8: ; %Flow14
30573040
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
3058-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[4:5], s6, 0x60
3059-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[0:1], s6, 3
3060-
; GFX1064-NEXT: v_cmp_class_f32_e64 s[2:3], v0, 0x1f8
3061-
; GFX1064-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s[4:5]
3062-
; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], -1
3063-
; GFX1064-NEXT: s_and_b64 vcc, s[0:1], s[2:3]
3064-
; GFX1064-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc
3065-
; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
3041+
; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
30663042
; GFX1064-NEXT: s_lshr_b32 s0, vcc_lo, 1
3067-
; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
3043+
; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v1
30683044
; GFX1064-NEXT: s_bitset1_b32 s0, 31
30693045
; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
30703046
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9

0 commit comments

Comments
 (0)