Skip to content

Commit 9525841

Browse files
authored
[AMDGPU] Use AMDGPU::isIntrinsicAlwaysUniform in isSDNodeAlwaysUniform (#87085)
This is mostly just a simplification, but tests show a slight codegen improvement in code using the deprecated amdgcn.icmp/fcmp intrinsics.
1 parent df54f62 commit 9525841

File tree

2 files changed

+13
-28
lines changed

2 files changed

+13
-28
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -851,12 +851,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
851851
return true;
852852
case ISD::INTRINSIC_WO_CHAIN: {
853853
unsigned IntrID = N->getConstantOperandVal(0);
854-
switch (IntrID) {
855-
case Intrinsic::amdgcn_readfirstlane:
856-
case Intrinsic::amdgcn_readlane:
857-
return true;
858-
}
859-
return false;
854+
return AMDGPU::isIntrinsicAlwaysUniform(IntrID);
860855
}
861856
case ISD::LOAD:
862857
if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==

llvm/test/CodeGen/AMDGPU/wave32.ll

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,8 +2479,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
24792479
; GFX1032-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
24802480
; GFX1032-NEXT: v_cvt_u32_f32_e32 v1, v1
24812481
; GFX1032-NEXT: v_mul_lo_u32 v2, s1, v1
2482-
; GFX1032-NEXT: s_ff1_i32_b32 s1, 0x80000000
2483-
; GFX1032-NEXT: s_add_i32 s1, s1, 32
2482+
; GFX1032-NEXT: s_brev_b32 s1, 1
24842483
; GFX1032-NEXT: v_mul_hi_u32 v2, v1, v2
24852484
; GFX1032-NEXT: v_add_nc_u32_e32 v1, v1, v2
24862485
; GFX1032-NEXT: v_mul_hi_u32 v1, v0, v1
@@ -2494,8 +2493,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
24942493
; GFX1032-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
24952494
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
24962495
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
2497-
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
2498-
; GFX1032-NEXT: s_min_u32 s0, s0, s1
2496+
; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
24992497
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
25002498
; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
25012499
; GFX1032-NEXT: s_and_b32 s0, vcc_lo, s0
@@ -2529,10 +2527,7 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
25292527
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
25302528
; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
25312529
; GFX1064-NEXT: s_bitset1_b32 s1, 31
2532-
; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
2533-
; GFX1064-NEXT: s_ff1_i32_b32 s1, s1
2534-
; GFX1064-NEXT: s_add_i32 s1, s1, 32
2535-
; GFX1064-NEXT: s_min_u32 s0, s0, s1
2530+
; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
25362531
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
25372532
; GFX1064-NEXT: s_cselect_b64 s[0:1], -1, 0
25382533
; GFX1064-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
@@ -2576,9 +2571,8 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
25762571
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
25772572
; GFX1032-NEXT: v_div_scale_f32 v1, s1, s0, s0, v0
25782573
; GFX1032-NEXT: v_div_scale_f32 v4, vcc_lo, v0, s0, v0
2579-
; GFX1032-NEXT: s_ff1_i32_b32 s1, 0x80000000
2574+
; GFX1032-NEXT: s_brev_b32 s1, 1
25802575
; GFX1032-NEXT: v_rcp_f32_e32 v2, v1
2581-
; GFX1032-NEXT: s_add_i32 s1, s1, 32
25822576
; GFX1032-NEXT: v_fma_f32 v3, -v1, v2, 1.0
25832577
; GFX1032-NEXT: v_fmac_f32_e32 v2, v3, v2
25842578
; GFX1032-NEXT: v_mul_f32_e32 v3, v4, v2
@@ -2592,8 +2586,7 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
25922586
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
25932587
; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
25942588
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
2595-
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
2596-
; GFX1032-NEXT: s_min_u32 s0, s0, s1
2589+
; GFX1032-NEXT: s_ff1_i32_b64 s0, s[0:1]
25972590
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
25982591
; GFX1032-NEXT: s_cselect_b32 s0, -1, 0
25992592
; GFX1032-NEXT: s_and_b32 s0, vcc_lo, s0
@@ -2609,26 +2602,23 @@ define amdgpu_kernel void @fcmp64(float %n, float %s) {
26092602
; GFX1064-NEXT: v_cvt_f32_u32_e32 v0, v0
26102603
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
26112604
; GFX1064-NEXT: v_div_scale_f32 v1, s[0:1], s2, s2, v0
2612-
; GFX1064-NEXT: v_div_scale_f32 v4, vcc, v0, s2, v0
26132605
; GFX1064-NEXT: v_rcp_f32_e32 v2, v1
26142606
; GFX1064-NEXT: v_fma_f32 v3, -v1, v2, 1.0
26152607
; GFX1064-NEXT: v_fmac_f32_e32 v2, v3, v2
2616-
; GFX1064-NEXT: v_mul_f32_e32 v3, v4, v2
2617-
; GFX1064-NEXT: v_fma_f32 v5, -v1, v3, v4
2618-
; GFX1064-NEXT: v_fmac_f32_e32 v3, v5, v2
2619-
; GFX1064-NEXT: v_fma_f32 v1, -v1, v3, v4
2620-
; GFX1064-NEXT: v_div_fmas_f32 v1, v1, v2, v3
2608+
; GFX1064-NEXT: v_div_scale_f32 v3, vcc, v0, s2, v0
2609+
; GFX1064-NEXT: v_mul_f32_e32 v4, v3, v2
2610+
; GFX1064-NEXT: v_fma_f32 v5, -v1, v4, v3
2611+
; GFX1064-NEXT: v_fmac_f32_e32 v4, v5, v2
2612+
; GFX1064-NEXT: v_fma_f32 v1, -v1, v4, v3
2613+
; GFX1064-NEXT: v_div_fmas_f32 v1, v1, v2, v4
26212614
; GFX1064-NEXT: v_div_fixup_f32 v1, v1, s2, v0
26222615
; GFX1064-NEXT: v_trunc_f32_e32 v1, v1
26232616
; GFX1064-NEXT: v_fma_f32 v0, -v1, s2, v0
26242617
; GFX1064-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
26252618
; GFX1064-NEXT: s_lshr_b64 s[0:1], vcc, 1
26262619
; GFX1064-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
26272620
; GFX1064-NEXT: s_bitset1_b32 s1, 31
2628-
; GFX1064-NEXT: s_ff1_i32_b32 s0, s0
2629-
; GFX1064-NEXT: s_ff1_i32_b32 s1, s1
2630-
; GFX1064-NEXT: s_add_i32 s1, s1, 32
2631-
; GFX1064-NEXT: s_min_u32 s0, s0, s1
2621+
; GFX1064-NEXT: s_ff1_i32_b64 s0, s[0:1]
26322622
; GFX1064-NEXT: s_cmp_gt_u32 s0, 9
26332623
; GFX1064-NEXT: s_cselect_b64 s[0:1], -1, 0
26342624
; GFX1064-NEXT: s_and_b64 s[0:1], vcc, s[0:1]

0 commit comments

Comments
 (0)