[AMDGPU][True16][CodeGen] update test for buildbot failure #131028

broxigarchen · 2025-03-12T20:56:34Z

This is a NFC patch

#103366 hit a buildbot failure with i1-to-bf16.ll. Update the test to fix the build.

Also remove duplicated comments added in #103366

llvmbot · 2025-03-12T20:57:16Z

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

This is a NFC patch

#103366 hit a buildbot failure with i1-to-bf16.ll. Update the test to fix the build.

Also remove duplicated comments added in #103366

Full diff: https://github.com/llvm/llvm-project/pull/131028.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp (-3)
(modified) llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll (+12-4)

diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
index 1a00b5a846b2f..4deb2a9485e4d 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
@@ -25,9 +25,6 @@
 /// This pass also adds register allocation hints to COPY.
 /// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
 /// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
-/// This pass also adds register allocation hints to COPY.
-/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
-/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
 /// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
 /// the VGPR_32, the COPY can be completely eliminated.
 ///
diff --git a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
index a46d629c02b85..59b0ba2469a20 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
@@ -108,13 +108,14 @@ define amdgpu_ps i32 @s_uitofp_i1_to_bf16(i1 inreg %num) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_bfe_u32 s1, s0, 0x10010
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s1, s1, s0
 ; GFX11-NEXT:    s_bitset1_b32 s0, 22
 ; GFX11-NEXT:    s_addk_i32 s1, 0x7fff
 ; GFX11-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
 ; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
 ;
@@ -125,6 +126,7 @@ define amdgpu_ps i32 @s_uitofp_i1_to_bf16(i1 inreg %num) {
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s0
 ; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    s_bfe_u32 s1, s0, 0x10010
 ; GFX12-NEXT:    s_or_b32 s2, s0, 0x400000
 ; GFX12-NEXT:    s_wait_alu 0xfffe
@@ -305,10 +307,11 @@ define amdgpu_ps <2 x i32> @s_uitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1.0, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e64 s1, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
 ; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x10010
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s3, s3, s0
 ; GFX11-NEXT:    s_bitset1_b32 s0, 22
 ; GFX11-NEXT:    s_addk_i32 s3, 0x7fff
@@ -338,6 +341,7 @@ define amdgpu_ps <2 x i32> @s_uitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX12-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    s_bfe_u32 s1, s0, 0x10010
 ; GFX12-NEXT:    s_or_b32 s3, s0, 0x400000
 ; GFX12-NEXT:    s_wait_alu 0xfffe
@@ -1161,13 +1165,14 @@ define amdgpu_ps i32 @s_sitofp_i1_to_bf16(i1 inreg %num) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_bfe_u32 s1, s0, 0x10010
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s1, s1, s0
 ; GFX11-NEXT:    s_bitset1_b32 s0, 22
 ; GFX11-NEXT:    s_addk_i32 s1, 0x7fff
 ; GFX11-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
 ; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_ashr_i32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
 ;
@@ -1178,6 +1183,7 @@ define amdgpu_ps i32 @s_sitofp_i1_to_bf16(i1 inreg %num) {
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX12-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, s0
 ; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    s_bfe_u32 s1, s0, 0x10010
 ; GFX12-NEXT:    s_or_b32 s2, s0, 0x400000
 ; GFX12-NEXT:    s_wait_alu 0xfffe
@@ -1358,10 +1364,11 @@ define amdgpu_ps <2 x i32> @s_sitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, -1.0, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e64 s0, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
 ; GFX11-NEXT:    s_bfe_u32 s3, s1, 0x10010
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s3, s3, s1
 ; GFX11-NEXT:    s_bitset1_b32 s1, 22
 ; GFX11-NEXT:    s_addk_i32 s3, 0x7fff
@@ -1391,6 +1398,7 @@ define amdgpu_ps <2 x i32> @s_sitofp_v2i1_to_v2bf16(<2 x i1> inreg %num) {
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX12-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    s_bfe_u32 s1, s0, 0x10010
 ; GFX12-NEXT:    s_or_b32 s3, s0, 0x400000
 ; GFX12-NEXT:    s_wait_alu 0xfffe

llvm-ci · 2025-03-12T21:13:52Z

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/14480

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp   -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test/ompt /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test -L /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -I /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test/ompt /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

This is a NFC patch llvm#103366 hit a buildbot failure with i1-to-bf16.ll. Update the test to fix the build. Also remove duplicated comments added in llvm#103366

quick fix for build

3e41283

broxigarchen marked this pull request as ready for review March 12, 2025 20:56

llvmbot added the backend:AMDGPU label Mar 12, 2025

broxigarchen mentioned this pull request Mar 12, 2025

[AMDGPU][True16] added Pre-RA hint to improve copy elimination #103366

Merged

broxigarchen merged commit c8047c6 into llvm:main Mar 12, 2025
10 of 14 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU][True16][CodeGen] update test for buildbot failure #131028

[AMDGPU][True16][CodeGen] update test for buildbot failure #131028

Uh oh!

broxigarchen commented Mar 12, 2025

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

Uh oh!

llvm-ci commented Mar 12, 2025

Uh oh!

Uh oh!

[AMDGPU][True16][CodeGen] update test for buildbot failure #131028

[AMDGPU][True16][CodeGen] update test for buildbot failure #131028

Uh oh!

Conversation

broxigarchen commented Mar 12, 2025

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

Uh oh!

llvm-ci commented Mar 12, 2025

Uh oh!

Uh oh!