[AMDGPU] adjust tests to prevent fpclass bitcast folding (#106268)

AlexMaclean · web-flow · commit 4c4908cd5d4b · 2024-08-27T13:20:44.000-07:00
Make some minor tweaks to AMDGPU tests to ensure they still work as intended after #97762. These tests can be radically simplified after bitcast aware fpclass deduction.
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -152,7 +152,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
 ; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_and_b32_e32 v0, 0x8000, v0
+; GCN-NEXT:    v_and_b32_e32 v0, 0x8001, v0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GCN-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -164,7 +164,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
 ; GFX8-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8-NEXT:    s_mov_b32 s2, -1
 ; GFX8-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x8000
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0x8001
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_and_b32_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
@@ -179,7 +179,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v0, 0x80008000, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0x80018001, v0
 ; GFX9-NEXT:    v_bfi_b32 v0, v1, 0, v0
 ; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -188,7 +188,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
 bb:
   %tmp = load i16, ptr addrspace(1) undef, align 2
   %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1
-  %tmp4 = and <2 x i16> %tmp2, <i16 -32768, i16 -32768>
+  %tmp4 = and <2 x i16> %tmp2, <i16 -32767, i16 -32767>
   %tmp5 = zext <2 x i16> %tmp4 to <2 x i32>
   %tmp6 = shl nuw <2 x i32> %tmp5, <i32 16, i32 16>
   %tmp7 = or <2 x i32> zeroinitializer, %tmp6
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -1553,10 +1553,10 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_cmp_nlt_f32_e64 s[0:1], s0, 0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_nge_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
-; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_b64 vcc, exec, vcc
 ; GCN-NEXT:    s_endpgm
 ;
@@ -1567,23 +1567,23 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
 ; GFX11-NEXT:    v_cmp_nlt_f32_e64 s0, s0, 0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    v_cmp_nge_f32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_mul_f32_e64 v0, -v0, v1
-; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    s_and_b32 vcc_lo, exec_lo, vcc_lo
 ; GFX11-NEXT:    s_endpgm
 .entry:
   %i = fcmp uge float %.i2369, 0.000000e+00
   %.i2379 = select i1 %i, i32 1, i32 0
   %.i0436 = bitcast i32 %.i2379 to float
   %.i0440 = fneg float %.i0436
-  %i1 = fcmp uge float %.i0436, 0.000000e+00
+  %i1 = fcmp ugt float %.i0436, 0.000000e+00
   %.i2495 = select i1 %i1, i32 %.i2379, i32 0
   %.i0552 = bitcast i32 %.i2495 to float
   %.i0592 = fmul float %.i0440, %.i0552
-  %.i0721 = fcmp ogt float %.i0592, 0.000000e+00
+  %.i0721 = fcmp oge float %.i0592, 0.000000e+00
   br i1 %.i0721, label %bb5, label %bb
 
 bb:                                               ; preds = %.entry