Skip to content

Commit 976c37e

Browse files
amansharma612Aman Sharma
andauthored
[AMDGPU] Set hasSideEffects=0 for SALU psuedos (#134487)
Fixes #128685 --------- Co-authored-by: Aman Sharma <[email protected]>
1 parent 5a41fc2 commit 976c37e

File tree

3 files changed

+30
-26
lines changed

3 files changed

+30
-26
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
231231
let mayStore = 0;
232232
}
233233

234-
let usesCustomInserter = 1 in {
234+
let usesCustomInserter = 1, hasSideEffects = 0 in {
235235
let WaveSizePredicate = isWave32 in
236236
def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
237237
(outs SReg_32:$sdst), (ins SSrc_b32:$mask),
@@ -243,7 +243,7 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
243243
(outs SReg_64:$sdst), (ins SSrc_b64:$mask),
244244
[(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
245245
>;
246-
} // End usesCustomInserter = 1
246+
} // End usesCustomInserter = 1, hasSideEffects = 0
247247

248248
let WaveSizePredicate = isWave32 in
249249
def : GCNPat <
@@ -338,21 +338,23 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
338338
[(set SReg_64:$sdst, (UniformBinFrag<sub> i64:$src0, i64:$src1))]
339339
>;
340340

341-
def S_ADD_CO_PSEUDO : SPseudoInstSI <
342-
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
343-
>;
341+
let hasSideEffects = 0 in {
342+
def S_ADD_CO_PSEUDO : SPseudoInstSI <
343+
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
344+
>;
344345

345-
def S_SUB_CO_PSEUDO : SPseudoInstSI <
346-
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
347-
>;
346+
def S_SUB_CO_PSEUDO : SPseudoInstSI <
347+
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
348+
>;
348349

349-
def S_UADDO_PSEUDO : SPseudoInstSI <
350-
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
351-
>;
350+
def S_UADDO_PSEUDO : SPseudoInstSI <
351+
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
352+
>;
352353

353-
def S_USUBO_PSEUDO : SPseudoInstSI <
354-
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
355-
>;
354+
def S_USUBO_PSEUDO : SPseudoInstSI <
355+
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
356+
>;
357+
}
356358

357359
let OtherPredicates = [HasShaderCyclesHiLoRegisters] in
358360
def GET_SHADERCYCLESHILO : SPseudoInstSI<

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -663,14 +663,16 @@ let SubtargetPredicate = isGFX12Plus in {
663663
}
664664

665665
// The higher 32-bits of the inputs contain the sign extension bits.
666-
def S_MUL_I64_I32_PSEUDO : SPseudoInstSI <
667-
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
668-
>;
666+
let hasSideEffects = 0 in {
667+
def S_MUL_I64_I32_PSEUDO : SPseudoInstSI <
668+
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
669+
>;
669670

670-
// The higher 32-bits of the inputs are zero.
671-
def S_MUL_U64_U32_PSEUDO : SPseudoInstSI <
672-
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
673-
>;
671+
// The higher 32-bits of the inputs are zero.
672+
def S_MUL_U64_U32_PSEUDO : SPseudoInstSI <
673+
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
674+
>;
675+
}
674676

675677
} // End SubtargetPredicate = isGFX12Plus
676678

llvm/test/CodeGen/AMDGPU/mul.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2850,17 +2850,17 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a,
28502850
; GFX12-NEXT: s_mov_b32 s5, s3
28512851
; GFX12-NEXT: s_mov_b32 s17, s3
28522852
; GFX12-NEXT: s_mov_b32 s19, s3
2853-
; GFX12-NEXT: s_mov_b32 s24, s3
2853+
; GFX12-NEXT: s_mov_b32 s20, s3
28542854
; GFX12-NEXT: s_wait_kmcnt 0x0
28552855
; GFX12-NEXT: s_mov_b32 s2, s8
28562856
; GFX12-NEXT: s_mov_b32 s6, s12
28572857
; GFX12-NEXT: s_mov_b32 s4, s13
28582858
; GFX12-NEXT: s_mul_u64 s[22:23], s[6:7], s[2:3]
2859-
; GFX12-NEXT: s_mul_u64 s[20:21], s[4:5], s[2:3]
2859+
; GFX12-NEXT: s_mul_u64 s[24:25], s[4:5], s[2:3]
28602860
; GFX12-NEXT: s_mov_b32 s2, s23
28612861
; GFX12-NEXT: s_mov_b32 s16, s9
28622862
; GFX12-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13]
2863-
; GFX12-NEXT: s_add_nc_u64 s[12:13], s[20:21], s[2:3]
2863+
; GFX12-NEXT: s_add_nc_u64 s[12:13], s[24:25], s[2:3]
28642864
; GFX12-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17]
28652865
; GFX12-NEXT: s_mov_b32 s2, s13
28662866
; GFX12-NEXT: s_mov_b32 s13, s3
@@ -2871,9 +2871,9 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a,
28712871
; GFX12-NEXT: s_mov_b32 s23, s3
28722872
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19]
28732873
; GFX12-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9]
2874-
; GFX12-NEXT: s_mov_b32 s25, s6
2874+
; GFX12-NEXT: s_mov_b32 s21, s6
28752875
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[4:5], s[2:3]
2876-
; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[24:25]
2876+
; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[20:21]
28772877
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9]
28782878
; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
28792879
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3

0 commit comments

Comments
 (0)