Skip to content

AMDGPU: Switch simplifydemandedbits-recursion.ll to generated checks #131317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Mar 14, 2025

This just checked the s_endpgm. Generate full checks, and remove undefs.

Copy link
Contributor Author

arsenm commented Mar 14, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented Mar 14, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

This just checked the s_endpgm. Generate full checks, and remove undefs.


Full diff: https://github.com/llvm/llvm-project/pull/131317.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll (+71-8)
diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
index 55b4d12805926..a5299ea36958d 100644
--- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
 
 ; Check we can compile this bugpoint-reduced test without an
@@ -9,17 +10,79 @@
 
 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
 
-; Function Attrs: nounwind readnone speculatable
 declare i32 @llvm.amdgcn.workitem.id.y() #0
-
-; Function Attrs: nounwind readnone speculatable
 declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-; Function Attrs: nounwind readnone speculatable
 declare float @llvm.fmuladd.f32(float, float, float) #0
 
-; CHECK: s_endpgm
 define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
+; CHECK-LABEL: foo:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_load_dword s6, s[4:5], 0x10
+; CHECK-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
+; CHECK-NEXT:    s_load_dword s10, s[4:5], 0x11
+; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; CHECK-NEXT:    s_movk_i32 s0, 0x54
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mad_u32_u24 v1, v1, s0, v2
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_bitcmp1_b32 s6, 8
+; CHECK-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s6, 16
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v2
+; CHECK-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
+; CHECK-NEXT:    s_bitcmp1_b32 s2, 24
+; CHECK-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; CHECK-NEXT:    s_xor_b64 s[6:7], s[6:7], -1
+; CHECK-NEXT:    s_bitcmp1_b32 s3, 0
+; CHECK-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s10, 8
+; CHECK-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; CHECK-NEXT:    s_and_b64 s[2:3], exec, s[6:7]
+; CHECK-NEXT:    s_and_b64 s[4:5], exec, s[4:5]
+; CHECK-NEXT:    s_and_b64 s[6:7], exec, s[10:11]
+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
+; CHECK-NEXT:    s_mov_b32 m0, -1
+; CHECK-NEXT:  .LBB0_1: ; %.loopexit145
+; CHECK-NEXT:    ; =>This Loop Header: Depth=1
+; CHECK-NEXT:    ; Child Loop BB0_3 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_4 Depth 3
+; CHECK-NEXT:    ; Child Loop BB0_5 Depth 2
+; CHECK-NEXT:    v_mov_b32_e32 v2, v1
+; CHECK-NEXT:    s_branch .LBB0_3
+; CHECK-NEXT:  .LBB0_2: ; %.loopexit
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x540, v2
+; CHECK-NEXT:    s_mov_b64 vcc, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_5
+; CHECK-NEXT:  .LBB0_3: ; %bb13
+; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ; => This Loop Header: Depth=2
+; CHECK-NEXT:    ; Child Loop BB0_4 Depth 3
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; CHECK-NEXT:    v_mov_b32_e32 v3, v2
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT:  .LBB0_4: ; %bb21
+; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ; Parent Loop BB0_3 Depth=2
+; CHECK-NEXT:    ; => This Inner Loop Header: Depth=3
+; CHECK-NEXT:    ds_write_b32 v3, v0
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 32, v3
+; CHECK-NEXT:    s_mov_b64 vcc, s[2:3]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_4
+; CHECK-NEXT:    s_branch .LBB0_2
+; CHECK-NEXT:  .LBB0_5: ; %bb31
+; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    s_mov_b64 vcc, s[6:7]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_5
+; CHECK-NEXT:  ; %bb.6: ; %bb30
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    s_mov_b64 vcc, s[8:9]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
+; CHECK-NEXT:  ; %bb.7: ; %bb11
+; CHECK-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,7 +110,7 @@ bb13:                                             ; preds = %.loopexit, %.loopex
 
 bb17:                                             ; preds = %bb13
   %tmp18 = mul i32 %tmp15, 224
-  %tmp19 = add i32 undef, %tmp18
+  %tmp19 = add i32 0, %tmp18
   br label %bb21
 
 .loopexit:                                        ; preds = %bb21, %bb13
@@ -58,7 +121,7 @@ bb21:                                             ; preds = %bb21, %bb17
   %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
   %tmp23 = add i32 %tmp22, %tmp16
   %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23
-  store float undef, ptr addrspace(3) %tmp24, align 4
+  store float 0.0, ptr addrspace(3) %tmp24, align 4
   %tmp25 = add nuw i32 %tmp22, 8
   br i1 %c3, label %bb21, label %.loopexit
 

@arsenm arsenm force-pushed the users/arsenm/amdgpu/switch-scheduler-subrange-crash-generated-checks branch from a19e408 to 9ce83f0 Compare March 14, 2025 13:27
@arsenm arsenm force-pushed the users/arsenm/amdgpu/swithc-simplifydemandedbits-recursion-generated-checks branch from f20fb4d to 0bccd45 Compare March 14, 2025 13:27
@arsenm arsenm force-pushed the users/arsenm/amdgpu/switch-scheduler-subrange-crash-generated-checks branch from 9ce83f0 to 69d40c9 Compare March 14, 2025 17:32
@arsenm arsenm force-pushed the users/arsenm/amdgpu/swithc-simplifydemandedbits-recursion-generated-checks branch from 0bccd45 to c64c89f Compare March 14, 2025 17:32
Copy link
Contributor Author

arsenm commented Mar 17, 2025

Merge activity

  • Mar 17, 4:47 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Mar 17, 5:07 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 5:10 AM EDT: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/switch-scheduler-subrange-crash-generated-checks branch from 69d40c9 to 4d17b47 Compare March 17, 2025 09:04
Base automatically changed from users/arsenm/amdgpu/switch-scheduler-subrange-crash-generated-checks to main March 17, 2025 09:07
This just checked the s_endpgm. Generate full checks, and remove undefs.
@arsenm arsenm force-pushed the users/arsenm/amdgpu/swithc-simplifydemandedbits-recursion-generated-checks branch from c64c89f to 975348f Compare March 17, 2025 09:07
@arsenm arsenm merged commit 1f1f820 into main Mar 17, 2025
6 of 9 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/swithc-simplifydemandedbits-recursion-generated-checks branch March 17, 2025 09:10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants