Skip to content

Commit 8d5849c

Browse files
committed
[AMDGPU] SIFixSGPRCopies should not process twice V2S copies created by PHI preprocessing
1 parent 29aa9d0 commit 8d5849c

File tree

3 files changed

+38
-23
lines changed

3 files changed

+38
-23
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ class SIFixSGPRCopies {
127127
unsigned NextVGPRToSGPRCopyID = 0;
128128
MapVector<unsigned, V2SCopyInfo> V2SCopies;
129129
DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
130+
SmallSet<MachineInstr *, 4> PHISources;
130131

131132
public:
132133
MachineRegisterInfo *MRI;
@@ -692,6 +693,7 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
692693
.addReg(MO.getReg());
693694
MO.setReg(NewDst);
694695
analyzeVGPRToSGPRCopy(NewCopy);
696+
PHISources.insert(NewCopy);
695697
}
696698
}
697699
}
@@ -798,6 +800,7 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
798800
RegSequences.clear();
799801
PHINodes.clear();
800802
S2VCopies.clear();
803+
PHISources.clear();
801804

802805
return true;
803806
}
@@ -923,6 +926,8 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
923926
}
924927

925928
void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
929+
if (PHISources.contains(MI))
930+
return;
926931
Register DstReg = MI->getOperand(0).getReg();
927932
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
928933

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,19 +1114,23 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
11141114
; GCN-NEXT: s_waitcnt lgkmcnt(0)
11151115
; GCN-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
11161116
; GCN-NEXT: s_mov_b32 s3, 0xf000
1117-
; GCN-NEXT: s_mov_b32 s2, -1
11181117
; GCN-NEXT: s_waitcnt lgkmcnt(0)
11191118
; GCN-NEXT: v_add_f64 v[0:1], s[4:5], 1.0
1120-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
1121-
; GCN-NEXT: v_add_i32_e32 v1, vcc, 2, v1
1122-
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
1123-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 2, v0
1124-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
1125-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
1126-
; GCN-NEXT: v_or_b32_e32 v1, v2, v1
1127-
; GCN-NEXT: v_or_b32_e32 v0, v3, v0
1128-
; GCN-NEXT: v_add_i32_e32 v1, vcc, 0x20000, v1
1129-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x20000, v0
1119+
; GCN-NEXT: v_readfirstlane_b32 s2, v0
1120+
; GCN-NEXT: v_readfirstlane_b32 s4, v1
1121+
; GCN-NEXT: s_and_b32 s5, s4, 0xffff0000
1122+
; GCN-NEXT: s_add_i32 s4, s4, 2
1123+
; GCN-NEXT: s_and_b32 s6, s2, 0xffff0000
1124+
; GCN-NEXT: s_add_i32 s2, s2, 2
1125+
; GCN-NEXT: s_and_b32 s4, s4, 0xffff
1126+
; GCN-NEXT: s_and_b32 s2, s2, 0xffff
1127+
; GCN-NEXT: s_or_b32 s4, s5, s4
1128+
; GCN-NEXT: s_or_b32 s2, s6, s2
1129+
; GCN-NEXT: s_add_i32 s4, s4, 0x20000
1130+
; GCN-NEXT: s_add_i32 s5, s2, 0x20000
1131+
; GCN-NEXT: s_mov_b32 s2, -1
1132+
; GCN-NEXT: v_mov_b32_e32 v0, s5
1133+
; GCN-NEXT: v_mov_b32_e32 v1, s4
11301134
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
11311135
; GCN-NEXT: s_endpgm
11321136
;
@@ -1139,14 +1143,20 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
11391143
; VI-NEXT: v_mov_b32_e32 v3, s1
11401144
; VI-NEXT: s_waitcnt lgkmcnt(0)
11411145
; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0
1142-
; VI-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
1143-
; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0
1144-
; VI-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
1145-
; VI-NEXT: v_add_u32_e32 v1, vcc, 2, v1
1146-
; VI-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1147-
; VI-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1148-
; VI-NEXT: v_add_u32_e32 v1, vcc, 0x20000, v1
1149-
; VI-NEXT: v_add_u32_e32 v0, vcc, 0x20000, v0
1146+
; VI-NEXT: v_readfirstlane_b32 s0, v1
1147+
; VI-NEXT: v_readfirstlane_b32 s1, v0
1148+
; VI-NEXT: s_and_b32 s2, s1, 0xffff0000
1149+
; VI-NEXT: s_add_i32 s1, s1, 2
1150+
; VI-NEXT: s_and_b32 s3, s0, 0xffff0000
1151+
; VI-NEXT: s_add_i32 s0, s0, 2
1152+
; VI-NEXT: s_and_b32 s0, s0, 0xffff
1153+
; VI-NEXT: s_and_b32 s1, s1, 0xffff
1154+
; VI-NEXT: s_or_b32 s0, s3, s0
1155+
; VI-NEXT: s_or_b32 s1, s2, s1
1156+
; VI-NEXT: s_add_i32 s0, s0, 0x20000
1157+
; VI-NEXT: s_add_i32 s1, s1, 0x20000
1158+
; VI-NEXT: v_mov_b32_e32 v0, s1
1159+
; VI-NEXT: v_mov_b32_e32 v1, s0
11501160
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
11511161
; VI-NEXT: s_endpgm
11521162
;

llvm/test/CodeGen/AMDGPU/idiv-licm.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,8 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar
712712
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
713713
; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
714714
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
715-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
716-
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
715+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
716+
; GFX11-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v3, vcc_lo
717717
; GFX11-NEXT: global_store_b16 v4, v2, s[0:1]
718718
; GFX11-NEXT: s_cbranch_scc0 .LBB4_1
719719
; GFX11-NEXT: ; %bb.2: ; %bb2
@@ -824,9 +824,9 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
824824
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
825825
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
826826
; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
827-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
827+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
828828
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
829-
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
829+
; GFX11-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v3, vcc_lo
830830
; GFX11-NEXT: v_mov_b32_e32 v3, s5
831831
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
832832
; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2

0 commit comments

Comments
 (0)