Skip to content

AMDGPU/GlobalISel: Restore disabled test #129001

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2025

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Feb 27, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Feb 27, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm marked this pull request as ready for review February 27, 2025 05:09
@llvmbot
Copy link
Member

llvmbot commented Feb 27, 2025

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 74.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129001.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (+421-466)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 53f6c9543c3e3..074272f7bed86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -5,7 +5,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
-; REQUIRES: do-not-run-me
 
 define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
 ; GFX7-LABEL: v_div_fmas_f32:
@@ -291,14 +290,14 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double
 define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[2:3], 0xa
-; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x13
-; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x1c
-; GFX7-NEXT:    s_load_dword s7, s[2:3], 0x25
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0xa
+; GFX7-NEXT:    s_load_dword s3, s[4:5], 0x13
+; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x1c
+; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x25
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7-NEXT:    v_mov_b32_e32 v1, s5
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX7-NEXT:    s_and_b32 s2, 1, s7
 ; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
@@ -311,17 +310,17 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
 ;
 ; GFX8-LABEL: test_div_fmas_f32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x28
-; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x4c
-; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x70
-; GFX8-NEXT:    s_load_dword s5, s[2:3], 0x94
+; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x28
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x4c
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x70
+; GFX8-NEXT:    s_load_dword s3, s[4:5], 0x94
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8-NEXT:    s_and_b32 s0, 1, s5
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    s_and_b32 s0, 1, s3
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX8-NEXT:    s_nop 2
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -333,14 +332,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
 ; GFX10_W32-LABEL: test_div_fmas_f32:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x4
-; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x94
-; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x4c
-; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x70
-; GFX10_W32-NEXT:    s_load_dword s7, s[2:3], 0x28
-; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT:    s_load_dword s2, s[4:5], 0x94
+; GFX10_W32-NEXT:    s_load_dword s3, s[4:5], 0x4c
+; GFX10_W32-NEXT:    s_load_dword s6, s[4:5], 0x70
+; GFX10_W32-NEXT:    s_load_dword s7, s[4:5], 0x28
+; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
@@ -351,14 +350,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
 ; GFX10_W64-LABEL: test_div_fmas_f32:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x4
-; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x94
-; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x4c
-; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x70
-; GFX10_W64-NEXT:    s_load_dword s7, s[2:3], 0x28
-; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT:    s_load_dword s2, s[4:5], 0x94
+; GFX10_W64-NEXT:    s_load_dword s3, s[4:5], 0x4c
+; GFX10_W64-NEXT:    s_load_dword s6, s[4:5], 0x70
+; GFX10_W64-NEXT:    s_load_dword s7, s[4:5], 0x28
+; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
@@ -369,40 +368,36 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
 ; GFX11_W32-LABEL: test_div_fmas_f32:
 ; GFX11_W32:       ; %bb.0:
 ; GFX11_W32-NEXT:    s_clause 0x4
-; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x94
-; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x4c
-; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x70
-; GFX11_W32-NEXT:    s_load_b32 s7, s[2:3], 0x28
-; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT:    s_load_b32 s2, s[4:5], 0x94
+; GFX11_W32-NEXT:    s_load_b32 s3, s[4:5], 0x4c
+; GFX11_W32-NEXT:    s_load_b32 s6, s[4:5], 0x70
+; GFX11_W32-NEXT:    s_load_b32 s7, s[4:5], 0x28
+; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
+; GFX11_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s6
 ; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
 ; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT:    s_nop 0
-; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W32-NEXT:    s_endpgm
 ;
 ; GFX11_W64-LABEL: test_div_fmas_f32:
 ; GFX11_W64:       ; %bb.0:
 ; GFX11_W64-NEXT:    s_clause 0x4
-; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x94
-; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x4c
-; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x70
-; GFX11_W64-NEXT:    s_load_b32 s7, s[2:3], 0x28
-; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT:    s_load_b32 s2, s[4:5], 0x94
+; GFX11_W64-NEXT:    s_load_b32 s3, s[4:5], 0x4c
+; GFX11_W64-NEXT:    s_load_b32 s6, s[4:5], 0x70
+; GFX11_W64-NEXT:    s_load_b32 s7, s[4:5], 0x28
+; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT:    s_and_b32 s2, 1, s2
+; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11_W64-NEXT:    s_nop 0
-; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
   store float %result, ptr addrspace(1) %out, align 4
@@ -412,33 +407,33 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[2:3], 0x13
-; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x1c
-; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x25
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0x13
+; GFX7-NEXT:    s_load_dword s3, s[4:5], 0x1c
+; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x25
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7-NEXT:    v_mov_b32_e32 v1, s5
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX7-NEXT:    s_and_b32 s2, 1, s6
 ; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX7-NEXT:    s_mov_b32 s2, -1
-; GFX7-NEXT:    s_nop 2
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_nop 1
 ; GFX7-NEXT:    v_div_fmas_f32 v0, 1.0, v0, v1
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x4c
-; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x70
-; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x94
+; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x4c
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x70
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x94
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    s_and_b32 s0, 1, s4
+; GFX8-NEXT:    s_and_b32 s0, 1, s2
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX8-NEXT:    s_nop 2
 ; GFX8-NEXT:    v_div_fmas_f32 v2, 1.0, v0, v1
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -450,14 +445,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x94
-; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x70
-; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x4c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT:    s_load_dword s2, s[4:5], 0x94
+; GFX10_W32-NEXT:    s_load_dword s3, s[4:5], 0x70
+; GFX10_W32-NEXT:    s_load_dword s6, s[4:5], 0x4c
+; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX10_W32-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
 ; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -466,14 +461,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x94
-; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x70
-; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x4c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT:    s_load_dword s2, s[4:5], 0x94
+; GFX10_W64-NEXT:    s_load_dword s3, s[4:5], 0x70
+; GFX10_W64-NEXT:    s_load_dword s6, s[4:5], 0x4c
+; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX10_W64-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
 ; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -482,37 +477,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX11_W32:       ; %bb.0:
 ; GFX11_W32-NEXT:    s_clause 0x3
-; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x94
-; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x70
-; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x4c
-; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT:    s_load_b32 s2, s[4:5], 0x94
+; GFX11_W32-NEXT:    s_load_b32 s3, s[4:5], 0x70
+; GFX11_W32-NEXT:    s_load_b32 s6, s[4:5], 0x4c
+; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s5
+; GFX11_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX11_W32-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
 ; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT:    s_nop 0
-; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W32-NEXT:    s_endpgm
 ;
 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX11_W64:       ; %bb.0:
 ; GFX11_W64-NEXT:    s_clause 0x3
-; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x94
-; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x70
-; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x4c
-; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT:    s_load_b32 s2, s[4:5], 0x94
+; GFX11_W64-NEXT:    s_load_b32 s3, s[4:5], 0x70
+; GFX11_W64-NEXT:    s_load_b32 s6, s[4:5], 0x4c
+; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT:    s_and_b32 s2, 1, s2
+; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX11_W64-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
 ; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11_W64-NEXT:    s_nop 0
-; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
   store float %result, ptr addrspace(1) %out, align 4
@@ -522,33 +513,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[2:3], 0x2
-; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x4
-; GFX7-NEXT:    s_load_dword s6, s[2:3], 0xd
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0x2
+; GFX7-NEXT:    s_load_dword s3, s[4:5], 0x4
+; GFX7-NEXT:    s_load_dword s6, s[4:5], 0xd
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7-NEXT:    v_mov_b32_e32 v1, s5
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX7-NEXT:    s_and_b32 s2, 1, s6
 ; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX7-NEXT:    s_mov_b32 s2, -1
-; GFX7-NEXT:    s_nop 2
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_nop 1
 ; GFX7-NEXT:    v_div_fmas_f32 v0, v0, 1.0, v1
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x8
-; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x10
-; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x34
+; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x8
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x10
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    s_and_b32 s0, 1, s4
+; GFX8-NEXT:    s_and_b32 s0, 1, s2
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX8-NEXT:    s_nop 2
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v0, 1.0, v1
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -560,14 +551,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x34
-; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x10
-; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x8
-; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT:    s_load_dword s2, s[4:5], 0x34
+; GFX10_W32-NEXT:    s_load_dword s3, s[4:5], 0x10
+; GFX10_W32-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
 ; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -576,14 +567,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x34
-; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x10
-; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x8
-; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT:    s_load_dword s2, s[4:5], 0x34
+; GFX10_W64-NEXT:    s_load_dword s3, s[4:5], 0x10
+; GFX10_W64-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT:    s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
 ; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -592,37 +583,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
 ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX11_W32:       ; %bb.0:
 ; GFX11_W32-NEXT:    s_clause 0x3
-; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x34
-; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x10
-; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x8
-; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT:    s_load_b32 s2, s[4:5], 0x34
+; GFX11_W32-NEXT:    s_load_b32 s3, s[4:5], 0x10
+; GFX11_W32-NEXT:    s_load_b32 s6, s[4:5], 0x8
+; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s5
+; GFX11_W32-NEXT:    s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
 ; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
 ; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT:    s_nop 0
-; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W32-NEXT:    s_endpgm
 ;
 ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX11_W64:       ; %bb.0:
 ; GFX11_W64-NEXT:    s_clause 0x3
-; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x34
-; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x10
-; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x8
-; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT:    s_load_b32 s2, s[4:5], 0x34
+; GFX11_W64-NEXT:    s_load_b32 s3, s[4:5], 0x10
+; GFX11_W64-NEXT:    s_load_b32 s6, s[4:5], 0x8
+; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT:    ...
[truncated]

@arsenm arsenm merged commit 6a46cf4 into main Feb 28, 2025
16 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/globalisel-restore-div-fmas-test branch February 28, 2025 10:24
cheezeburglar pushed a commit to cheezeburglar/llvm-project that referenced this pull request Feb 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants