@@ -2568,22 +2568,23 @@ if.end2: ; preds = %if.end
2568
2568
define amdgpu_kernel void @fcmp64 (float %n , float %s ) {
2569
2569
; GFX1032-LABEL: fcmp64:
2570
2570
; GFX1032: ; %bb.0: ; %entry
2571
- ; GFX1032-NEXT: s_load_dword s0 , s[4:5], 0x28
2571
+ ; GFX1032-NEXT: s_load_dword s1 , s[4:5], 0x28
2572
2572
; GFX1032-NEXT: v_cvt_f32_u32_e32 v0, v0
2573
2573
; GFX1032-NEXT: ; implicit-def: $vgpr1
2574
2574
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
2575
- ; GFX1032-NEXT: v_cmp_ngt_f32_e64 s1 , v0, |s0 |
2576
- ; GFX1032-NEXT: s_and_saveexec_b32 s2, s1
2577
- ; GFX1032-NEXT: s_xor_b32 s1 , exec_lo, s2
2575
+ ; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0 , v0, |s1 |
2576
+ ; GFX1032-NEXT: s_and_saveexec_b32 s2, s0
2577
+ ; GFX1032-NEXT: s_xor_b32 s0 , exec_lo, s2
2578
2578
; GFX1032-NEXT: ; %bb.1: ; %frem.else
2579
2579
; GFX1032-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2580
- ; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s0 |
2580
+ ; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s1 |
2581
2581
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
2582
+ ; GFX1032-NEXT: ; implicit-def: $vgpr0
2582
2583
; GFX1032-NEXT: ; %bb.2: ; %Flow13
2583
- ; GFX1032-NEXT: s_andn2_saveexec_b32 s1, s1
2584
+ ; GFX1032-NEXT: s_andn2_saveexec_b32 s0, s0
2584
2585
; GFX1032-NEXT: s_cbranch_execz .LBB51_8
2585
2586
; GFX1032-NEXT: ; %bb.3: ; %frem.compute
2586
- ; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s0 |
2587
+ ; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s1 |
2587
2588
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
2588
2589
; GFX1032-NEXT: v_frexp_mant_f32_e32 v8, v0
2589
2590
; GFX1032-NEXT: v_ldexp_f32 v1, v1, 1
@@ -2595,19 +2596,19 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2595
2596
; GFX1032-NEXT: v_mul_f32_e32 v4, v5, v3
2596
2597
; GFX1032-NEXT: v_fma_f32 v6, -v2, v4, v5
2597
2598
; GFX1032-NEXT: v_fmac_f32_e32 v4, v6, v3
2598
- ; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s0
2599
+ ; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s1
2599
2600
; GFX1032-NEXT: v_fma_f32 v5, -v2, v4, v5
2600
2601
; GFX1032-NEXT: v_add_nc_u32_e32 v2, -1, v6
2601
2602
; GFX1032-NEXT: v_div_fmas_f32 v3, v5, v3, v4
2602
2603
; GFX1032-NEXT: v_xad_u32 v4, v2, -1, v7
2603
2604
; GFX1032-NEXT: v_ldexp_f32 v5, v8, 12
2604
2605
; GFX1032-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
2605
2606
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 12, v4
2606
- ; GFX1032-NEXT: s_and_saveexec_b32 s2 , vcc_lo
2607
+ ; GFX1032-NEXT: s_and_saveexec_b32 s1 , vcc_lo
2607
2608
; GFX1032-NEXT: s_cbranch_execz .LBB51_7
2608
2609
; GFX1032-NEXT: ; %bb.4: ; %frem.loop_body.preheader
2609
2610
; GFX1032-NEXT: v_sub_nc_u32_e32 v4, v7, v6
2610
- ; GFX1032-NEXT: s_mov_b32 s3 , 0
2611
+ ; GFX1032-NEXT: s_mov_b32 s2 , 0
2611
2612
; GFX1032-NEXT: v_add_nc_u32_e32 v4, 12, v4
2612
2613
; GFX1032-NEXT: .LBB51_5: ; %frem.loop_body
2613
2614
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2621,15 +2622,16 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2621
2622
; GFX1032-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2622
2623
; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
2623
2624
; GFX1032-NEXT: v_ldexp_f32 v5, v5, 12
2624
- ; GFX1032-NEXT: s_or_b32 s3 , vcc_lo, s3
2625
- ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s3
2625
+ ; GFX1032-NEXT: s_or_b32 s2 , vcc_lo, s2
2626
+ ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
2626
2627
; GFX1032-NEXT: s_cbranch_execnz .LBB51_5
2627
2628
; GFX1032-NEXT: ; %bb.6: ; %Flow
2628
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3
2629
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2629
2630
; GFX1032-NEXT: v_mov_b32_e32 v5, v6
2630
2631
; GFX1032-NEXT: .LBB51_7: ; %Flow12
2631
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2632
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2632
2633
; GFX1032-NEXT: v_add_nc_u32_e32 v4, -11, v4
2634
+ ; GFX1032-NEXT: v_and_b32_e32 v0, 0x80000000, v0
2633
2635
; GFX1032-NEXT: v_ldexp_f32 v4, v5, v4
2634
2636
; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v3
2635
2637
; GFX1032-NEXT: v_rndne_f32_e32 v3, v3
@@ -2638,21 +2640,13 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2638
2640
; GFX1032-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v3
2639
2641
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
2640
2642
; GFX1032-NEXT: v_ldexp_f32 v1, v1, v2
2641
- ; GFX1032-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2642
- ; GFX1032-NEXT: v_xor_b32_e32 v1, v2, v1
2643
+ ; GFX1032-NEXT: v_xor_b32_e32 v1, v0, v1
2643
2644
; GFX1032-NEXT: .LBB51_8: ; %Flow14
2644
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2645
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s1, s0, 3
2646
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s0, s0, 0x60
2647
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s2, v0, 0x1f8
2648
- ; GFX1032-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s0
2649
- ; GFX1032-NEXT: s_xor_b32 s0, s1, -1
2645
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
2646
+ ; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v1
2650
2647
; GFX1032-NEXT: s_brev_b32 s1, 1
2651
- ; GFX1032-NEXT: s_and_b32 vcc_lo, s0, s2
2652
- ; GFX1032-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
2653
- ; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2654
2648
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2655
- ; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2649
+ ; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v1
2656
2650
; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
2657
2651
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
2658
2652
; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
@@ -2665,34 +2659,35 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2665
2659
;
2666
2660
; GFX1064-LABEL: fcmp64:
2667
2661
; GFX1064: ; %bb.0: ; %entry
2668
- ; GFX1064-NEXT: s_load_dword s6 , s[4:5], 0x28
2662
+ ; GFX1064-NEXT: s_load_dword s2 , s[4:5], 0x28
2669
2663
; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
2670
2664
; GFX1064-NEXT: ; implicit-def: $vgpr1
2671
2665
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
2672
- ; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s6 |
2673
- ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3 ], s[0:1]
2674
- ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[2:3 ]
2666
+ ; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s2 |
2667
+ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5 ], s[0:1]
2668
+ ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[4:5 ]
2675
2669
; GFX1064-NEXT: ; %bb.1: ; %frem.else
2676
2670
; GFX1064-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2677
- ; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s6 |
2671
+ ; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s2 |
2678
2672
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
2673
+ ; GFX1064-NEXT: ; implicit-def: $vgpr0
2679
2674
; GFX1064-NEXT: ; %bb.2: ; %Flow13
2680
2675
; GFX1064-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
2681
2676
; GFX1064-NEXT: s_cbranch_execz .LBB51_8
2682
2677
; GFX1064-NEXT: ; %bb.3: ; %frem.compute
2683
- ; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s6 |
2678
+ ; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s2 |
2684
2679
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
2685
2680
; GFX1064-NEXT: v_frexp_mant_f32_e32 v8, v0
2686
2681
; GFX1064-NEXT: v_ldexp_f32 v1, v1, 1
2687
- ; GFX1064-NEXT: v_div_scale_f32 v2, s[2:3 ], v1, v1, 1.0
2682
+ ; GFX1064-NEXT: v_div_scale_f32 v2, s[4:5 ], v1, v1, 1.0
2688
2683
; GFX1064-NEXT: v_div_scale_f32 v5, vcc, 1.0, v1, 1.0
2689
2684
; GFX1064-NEXT: v_rcp_f32_e32 v3, v2
2690
2685
; GFX1064-NEXT: v_fma_f32 v4, -v2, v3, 1.0
2691
2686
; GFX1064-NEXT: v_fmac_f32_e32 v3, v4, v3
2692
2687
; GFX1064-NEXT: v_mul_f32_e32 v4, v5, v3
2693
2688
; GFX1064-NEXT: v_fma_f32 v6, -v2, v4, v5
2694
2689
; GFX1064-NEXT: v_fmac_f32_e32 v4, v6, v3
2695
- ; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s6
2690
+ ; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s2
2696
2691
; GFX1064-NEXT: v_fma_f32 v5, -v2, v4, v5
2697
2692
; GFX1064-NEXT: v_add_nc_u32_e32 v2, -1, v6
2698
2693
; GFX1064-NEXT: v_div_fmas_f32 v3, v5, v3, v4
@@ -2727,6 +2722,7 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2727
2722
; GFX1064-NEXT: .LBB51_7: ; %Flow12
2728
2723
; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3]
2729
2724
; GFX1064-NEXT: v_add_nc_u32_e32 v4, -11, v4
2725
+ ; GFX1064-NEXT: v_and_b32_e32 v0, 0x80000000, v0
2730
2726
; GFX1064-NEXT: v_ldexp_f32 v4, v5, v4
2731
2727
; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v3
2732
2728
; GFX1064-NEXT: v_rndne_f32_e32 v3, v3
@@ -2735,20 +2731,12 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
2735
2731
; GFX1064-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3
2736
2732
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2737
2733
; GFX1064-NEXT: v_ldexp_f32 v1, v1, v2
2738
- ; GFX1064-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2739
- ; GFX1064-NEXT: v_xor_b32_e32 v1, v2, v1
2734
+ ; GFX1064-NEXT: v_xor_b32_e32 v1, v0, v1
2740
2735
; GFX1064-NEXT: .LBB51_8: ; %Flow14
2741
2736
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
2742
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[4:5], s6, 0x60
2743
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[0:1], s6, 3
2744
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[2:3], v0, 0x1f8
2745
- ; GFX1064-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s[4:5]
2746
- ; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], -1
2747
- ; GFX1064-NEXT: s_and_b64 vcc, s[0:1], s[2:3]
2748
- ; GFX1064-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc
2749
- ; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2737
+ ; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
2750
2738
; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
2751
- ; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
2739
+ ; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v1
2752
2740
; GFX1064-NEXT: s_bitset1_b32 s1, 31
2753
2741
; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
2754
2742
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
@@ -2884,22 +2872,23 @@ if.end2: ; preds = %if.end
2884
2872
define amdgpu_kernel void @fcmp32 (float %n , float %s ) {
2885
2873
; GFX1032-LABEL: fcmp32:
2886
2874
; GFX1032: ; %bb.0: ; %entry
2887
- ; GFX1032-NEXT: s_load_dword s0 , s[4:5], 0x28
2875
+ ; GFX1032-NEXT: s_load_dword s1 , s[4:5], 0x28
2888
2876
; GFX1032-NEXT: v_cvt_f32_u32_e32 v0, v0
2889
2877
; GFX1032-NEXT: ; implicit-def: $vgpr1
2890
2878
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
2891
- ; GFX1032-NEXT: v_cmp_ngt_f32_e64 s1 , v0, |s0 |
2892
- ; GFX1032-NEXT: s_and_saveexec_b32 s2, s1
2893
- ; GFX1032-NEXT: s_xor_b32 s1 , exec_lo, s2
2879
+ ; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0 , v0, |s1 |
2880
+ ; GFX1032-NEXT: s_and_saveexec_b32 s2, s0
2881
+ ; GFX1032-NEXT: s_xor_b32 s0 , exec_lo, s2
2894
2882
; GFX1032-NEXT: ; %bb.1: ; %frem.else
2895
2883
; GFX1032-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2896
- ; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s0 |
2884
+ ; GFX1032-NEXT: v_cmp_eq_f32_e64 vcc_lo, v0, |s1 |
2897
2885
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
2886
+ ; GFX1032-NEXT: ; implicit-def: $vgpr0
2898
2887
; GFX1032-NEXT: ; %bb.2: ; %Flow13
2899
- ; GFX1032-NEXT: s_andn2_saveexec_b32 s1, s1
2888
+ ; GFX1032-NEXT: s_andn2_saveexec_b32 s0, s0
2900
2889
; GFX1032-NEXT: s_cbranch_execz .LBB53_8
2901
2890
; GFX1032-NEXT: ; %bb.3: ; %frem.compute
2902
- ; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s0 |
2891
+ ; GFX1032-NEXT: v_frexp_mant_f32_e64 v1, |s1 |
2903
2892
; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
2904
2893
; GFX1032-NEXT: v_frexp_mant_f32_e32 v8, v0
2905
2894
; GFX1032-NEXT: v_ldexp_f32 v1, v1, 1
@@ -2911,19 +2900,19 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
2911
2900
; GFX1032-NEXT: v_mul_f32_e32 v4, v5, v3
2912
2901
; GFX1032-NEXT: v_fma_f32 v6, -v2, v4, v5
2913
2902
; GFX1032-NEXT: v_fmac_f32_e32 v4, v6, v3
2914
- ; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s0
2903
+ ; GFX1032-NEXT: v_frexp_exp_i32_f32_e32 v6, s1
2915
2904
; GFX1032-NEXT: v_fma_f32 v5, -v2, v4, v5
2916
2905
; GFX1032-NEXT: v_add_nc_u32_e32 v2, -1, v6
2917
2906
; GFX1032-NEXT: v_div_fmas_f32 v3, v5, v3, v4
2918
2907
; GFX1032-NEXT: v_xad_u32 v4, v2, -1, v7
2919
2908
; GFX1032-NEXT: v_ldexp_f32 v5, v8, 12
2920
2909
; GFX1032-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
2921
2910
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 12, v4
2922
- ; GFX1032-NEXT: s_and_saveexec_b32 s2 , vcc_lo
2911
+ ; GFX1032-NEXT: s_and_saveexec_b32 s1 , vcc_lo
2923
2912
; GFX1032-NEXT: s_cbranch_execz .LBB53_7
2924
2913
; GFX1032-NEXT: ; %bb.4: ; %frem.loop_body.preheader
2925
2914
; GFX1032-NEXT: v_sub_nc_u32_e32 v4, v7, v6
2926
- ; GFX1032-NEXT: s_mov_b32 s3 , 0
2915
+ ; GFX1032-NEXT: s_mov_b32 s2 , 0
2927
2916
; GFX1032-NEXT: v_add_nc_u32_e32 v4, 12, v4
2928
2917
; GFX1032-NEXT: .LBB53_5: ; %frem.loop_body
2929
2918
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2937,15 +2926,16 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
2937
2926
; GFX1032-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2938
2927
; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
2939
2928
; GFX1032-NEXT: v_ldexp_f32 v5, v5, 12
2940
- ; GFX1032-NEXT: s_or_b32 s3 , vcc_lo, s3
2941
- ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s3
2929
+ ; GFX1032-NEXT: s_or_b32 s2 , vcc_lo, s2
2930
+ ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
2942
2931
; GFX1032-NEXT: s_cbranch_execnz .LBB53_5
2943
2932
; GFX1032-NEXT: ; %bb.6: ; %Flow
2944
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3
2933
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2945
2934
; GFX1032-NEXT: v_mov_b32_e32 v5, v6
2946
2935
; GFX1032-NEXT: .LBB53_7: ; %Flow12
2947
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2
2936
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2948
2937
; GFX1032-NEXT: v_add_nc_u32_e32 v4, -11, v4
2938
+ ; GFX1032-NEXT: v_and_b32_e32 v0, 0x80000000, v0
2949
2939
; GFX1032-NEXT: v_ldexp_f32 v4, v5, v4
2950
2940
; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v3
2951
2941
; GFX1032-NEXT: v_rndne_f32_e32 v3, v3
@@ -2954,20 +2944,12 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
2954
2944
; GFX1032-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v3
2955
2945
; GFX1032-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
2956
2946
; GFX1032-NEXT: v_ldexp_f32 v1, v1, v2
2957
- ; GFX1032-NEXT: v_and_b32_e32 v2, 0x80000000, v0
2958
- ; GFX1032-NEXT: v_xor_b32_e32 v1, v2, v1
2947
+ ; GFX1032-NEXT: v_xor_b32_e32 v1, v0, v1
2959
2948
; GFX1032-NEXT: .LBB53_8: ; %Flow14
2960
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
2961
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s1, s0, 3
2962
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s0, s0, 0x60
2963
- ; GFX1032-NEXT: v_cmp_class_f32_e64 s2, v0, 0x1f8
2964
- ; GFX1032-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s0
2965
- ; GFX1032-NEXT: s_xor_b32 s0, s1, -1
2966
- ; GFX1032-NEXT: s_and_b32 vcc_lo, s0, s2
2967
- ; GFX1032-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
2968
- ; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2949
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0
2950
+ ; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v1
2969
2951
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2970
- ; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2952
+ ; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v1
2971
2953
; GFX1032-NEXT: s_bitset1_b32 s0, 31
2972
2954
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
2973
2955
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
@@ -2981,34 +2963,35 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
2981
2963
;
2982
2964
; GFX1064-LABEL: fcmp32:
2983
2965
; GFX1064: ; %bb.0: ; %entry
2984
- ; GFX1064-NEXT: s_load_dword s6 , s[4:5], 0x28
2966
+ ; GFX1064-NEXT: s_load_dword s2 , s[4:5], 0x28
2985
2967
; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
2986
2968
; GFX1064-NEXT: ; implicit-def: $vgpr1
2987
2969
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
2988
- ; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s6 |
2989
- ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3 ], s[0:1]
2990
- ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[2:3 ]
2970
+ ; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], v0, |s2 |
2971
+ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5 ], s[0:1]
2972
+ ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[4:5 ]
2991
2973
; GFX1064-NEXT: ; %bb.1: ; %frem.else
2992
2974
; GFX1064-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, v0
2993
- ; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s6 |
2975
+ ; GFX1064-NEXT: v_cmp_eq_f32_e64 vcc, v0, |s2 |
2994
2976
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
2977
+ ; GFX1064-NEXT: ; implicit-def: $vgpr0
2995
2978
; GFX1064-NEXT: ; %bb.2: ; %Flow13
2996
2979
; GFX1064-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
2997
2980
; GFX1064-NEXT: s_cbranch_execz .LBB53_8
2998
2981
; GFX1064-NEXT: ; %bb.3: ; %frem.compute
2999
- ; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s6 |
2982
+ ; GFX1064-NEXT: v_frexp_mant_f32_e64 v1, |s2 |
3000
2983
; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
3001
2984
; GFX1064-NEXT: v_frexp_mant_f32_e32 v8, v0
3002
2985
; GFX1064-NEXT: v_ldexp_f32 v1, v1, 1
3003
- ; GFX1064-NEXT: v_div_scale_f32 v2, s[2:3 ], v1, v1, 1.0
2986
+ ; GFX1064-NEXT: v_div_scale_f32 v2, s[4:5 ], v1, v1, 1.0
3004
2987
; GFX1064-NEXT: v_div_scale_f32 v5, vcc, 1.0, v1, 1.0
3005
2988
; GFX1064-NEXT: v_rcp_f32_e32 v3, v2
3006
2989
; GFX1064-NEXT: v_fma_f32 v4, -v2, v3, 1.0
3007
2990
; GFX1064-NEXT: v_fmac_f32_e32 v3, v4, v3
3008
2991
; GFX1064-NEXT: v_mul_f32_e32 v4, v5, v3
3009
2992
; GFX1064-NEXT: v_fma_f32 v6, -v2, v4, v5
3010
2993
; GFX1064-NEXT: v_fmac_f32_e32 v4, v6, v3
3011
- ; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s6
2994
+ ; GFX1064-NEXT: v_frexp_exp_i32_f32_e32 v6, s2
3012
2995
; GFX1064-NEXT: v_fma_f32 v5, -v2, v4, v5
3013
2996
; GFX1064-NEXT: v_add_nc_u32_e32 v2, -1, v6
3014
2997
; GFX1064-NEXT: v_div_fmas_f32 v3, v5, v3, v4
@@ -3043,6 +3026,7 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
3043
3026
; GFX1064-NEXT: .LBB53_7: ; %Flow12
3044
3027
; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3]
3045
3028
; GFX1064-NEXT: v_add_nc_u32_e32 v4, -11, v4
3029
+ ; GFX1064-NEXT: v_and_b32_e32 v0, 0x80000000, v0
3046
3030
; GFX1064-NEXT: v_ldexp_f32 v4, v5, v4
3047
3031
; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v3
3048
3032
; GFX1064-NEXT: v_rndne_f32_e32 v3, v3
@@ -3051,20 +3035,12 @@ define amdgpu_kernel void @fcmp32(float %n, float %s) {
3051
3035
; GFX1064-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3
3052
3036
; GFX1064-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
3053
3037
; GFX1064-NEXT: v_ldexp_f32 v1, v1, v2
3054
- ; GFX1064-NEXT: v_and_b32_e32 v2, 0x80000000, v0
3055
- ; GFX1064-NEXT: v_xor_b32_e32 v1, v2, v1
3038
+ ; GFX1064-NEXT: v_xor_b32_e32 v1, v0, v1
3056
3039
; GFX1064-NEXT: .LBB53_8: ; %Flow14
3057
3040
; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1]
3058
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[4:5], s6, 0x60
3059
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[0:1], s6, 3
3060
- ; GFX1064-NEXT: v_cmp_class_f32_e64 s[2:3], v0, 0x1f8
3061
- ; GFX1064-NEXT: v_cndmask_b32_e64 v0, v1, 0x7fc00000, s[4:5]
3062
- ; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], -1
3063
- ; GFX1064-NEXT: s_and_b64 vcc, s[0:1], s[2:3]
3064
- ; GFX1064-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc
3065
- ; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
3041
+ ; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
3066
3042
; GFX1064-NEXT: s_lshr_b32 s0, vcc_lo, 1
3067
- ; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
3043
+ ; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v1
3068
3044
; GFX1064-NEXT: s_bitset1_b32 s0, 31
3069
3045
; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
3070
3046
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
0 commit comments