Skip to content

Commit 76d9ae9

Browse files
committed
[AMDGPU] avoid blind converting to VALU REG_SEQUENCE and PHIs
In the 2e29b01 we introduce a specific solving algorithm that analyzes the VGPR to SGPR copies use chains and either lowers the copy to v_readfirstlane_b32 or converts the whole chain to VALU forms. Same time we still have the code that blindly converts to VALU REG_SEQUENCE and PHIs in case they produce SGPR but have VGPRs input operands. In case the REG_SEQUENCE and PHIs are in the VGPR to SGPR copy use chain, and this chain was considered long enough to convert copy to v_readfistlane_b32, further lowering them to VALU leads to several kinds of issues. At first, we have v_readfistlane_b32 which is completely useless because most parts of its use chain were moved to VALU forms. Second, we may encounter subtle bugs related to the EXEC-dependent CF because of the weird mixing of SALU and VALU instructions. This change removes the code that moves REG_SEQUENCE and PHIs to VALU. Instead, we use the fact that both REG_SEQUENCE and PHIs have copy semantics. That is, if they define SGPR but have VGPR inputs, we insert VGPR to SGPR copies to make them pure SGPR. Then, the new copies are processed by the common VGPR to SGPR lowering algorithm. This is Part 2 in the series of commits aiming at the massive refactoring of the SIFixSGPRCopies pass. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D130367
1 parent 3cc3be8 commit 76d9ae9

15 files changed

+960
-621
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 177 additions & 198 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 51 additions & 51 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ define amdgpu_kernel void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind
1313
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
1414
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1515
; GCN-NEXT: s_addk_i32 s0, 0x80
16-
; GCN-NEXT: v_mov_b32_e32 v0, s0
17-
; GCN-NEXT: s_and_b64 s[0:1], exec, -1
16+
; GCN-NEXT: s_and_b64 vcc, exec, -1
1817
; GCN-NEXT: s_mov_b32 m0, -1
1918
; GCN-NEXT: .LBB0_2: ; %for.body
2019
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
20+
; GCN-NEXT: v_mov_b32_e32 v0, s0
2121
; GCN-NEXT: ds_read_b32 v1, v0
2222
; GCN-NEXT: s_waitcnt lgkmcnt(0)
2323
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
2424
; GCN-NEXT: ds_write_b32 v0, v1
25-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
26-
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
25+
; GCN-NEXT: s_add_i32 s0, s0, 4
26+
; GCN-NEXT: s_mov_b64 vcc, vcc
2727
; GCN-NEXT: s_cbranch_vccnz .LBB0_2
2828
; GCN-NEXT: .LBB0_3: ; %for.exit
2929
; GCN-NEXT: s_endpgm
@@ -92,15 +92,15 @@ define amdgpu_kernel void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nou
9292
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
9393
; GCN-NEXT: s_waitcnt lgkmcnt(0)
9494
; GCN-NEXT: s_addk_i32 s0, 0x80
95-
; GCN-NEXT: v_mov_b32_e32 v0, s0
9695
; GCN-NEXT: s_mov_b32 m0, -1
9796
; GCN-NEXT: .LBB1_1: ; %for.body
9897
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
98+
; GCN-NEXT: v_mov_b32_e32 v0, s0
9999
; GCN-NEXT: ds_read_b32 v1, v0
100100
; GCN-NEXT: s_waitcnt lgkmcnt(0)
101101
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
102102
; GCN-NEXT: ds_write_b32 v0, v1
103-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
103+
; GCN-NEXT: s_add_i32 s0, s0, 4
104104
; GCN-NEXT: s_branch .LBB1_1
105105
;
106106
; GCN_DBG-LABEL: loop_const_true:
@@ -291,23 +291,23 @@ define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind
291291
; GCN-NEXT: v_mov_b32_e32 v0, 0
292292
; GCN-NEXT: s_mov_b32 m0, -1
293293
; GCN-NEXT: ds_read_u8 v0, v0
294-
; GCN-NEXT: s_load_dword s2, s[0:1], 0x9
294+
; GCN-NEXT: s_load_dword s4, s[0:1], 0x9
295295
; GCN-NEXT: s_waitcnt lgkmcnt(0)
296296
; GCN-NEXT: v_readfirstlane_b32 s0, v0
297297
; GCN-NEXT: s_bitcmp1_b32 s0, 0
298298
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
299-
; GCN-NEXT: s_addk_i32 s2, 0x80
300-
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], -1
301-
; GCN-NEXT: v_mov_b32_e32 v0, s2
302-
; GCN-NEXT: s_and_b64 s[0:1], exec, s[0:1]
299+
; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], -1
300+
; GCN-NEXT: s_add_i32 s0, s4, 0x80
301+
; GCN-NEXT: s_and_b64 vcc, exec, s[2:3]
303302
; GCN-NEXT: .LBB4_1: ; %for.body
304303
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
304+
; GCN-NEXT: v_mov_b32_e32 v0, s0
305305
; GCN-NEXT: ds_read_b32 v1, v0
306306
; GCN-NEXT: s_waitcnt lgkmcnt(0)
307307
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
308308
; GCN-NEXT: ds_write_b32 v0, v1
309-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
310-
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
309+
; GCN-NEXT: s_add_i32 s0, s0, 4
310+
; GCN-NEXT: s_mov_b64 vcc, vcc
311311
; GCN-NEXT: s_cbranch_vccz .LBB4_1
312312
; GCN-NEXT: ; %bb.2: ; %for.exit
313313
; GCN-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/ds_read2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
979979
; CI-NEXT: s_mov_b32 s3, 0xf000
980980
; CI-NEXT: s_mov_b32 s2, -1
981981
; CI-NEXT: s_waitcnt lgkmcnt(0)
982-
; CI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
982+
; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
983983
; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0
984984
; CI-NEXT: s_endpgm
985985
;

llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir

Lines changed: 463 additions & 223 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir

Lines changed: 118 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,8 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
12
# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies,si-fold-operands,dead-mi-elimination -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
23

34
# Check that constant is in SGPR registers
45

5-
# GCN-LABEL: {{^}}name: const_to_sgpr{{$}}
6-
# GCN: %[[HI:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
7-
# GCN-NEXT: %[[LO:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576
8-
# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]]:sreg_64 = REG_SEQUENCE killed %[[LO]], %subreg.sub0, killed %[[HI]], %subreg.sub1
9-
# GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit $exec
10-
11-
12-
# GCN-LABEL: {{^}}name: const_to_sgpr_multiple_use{{$}}
13-
# GCN: %[[HI:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
14-
# GCN-NEXT: %[[LO:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576
15-
# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]]:sreg_64 = REG_SEQUENCE killed %[[LO]], %subreg.sub0, killed %[[HI]], %subreg.sub1
16-
# GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit $exec
17-
# GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit $exec
18-
19-
# GCN-LABEL: {{^}}name: const_to_sgpr_subreg{{$}}
20-
# GCN: %[[OP0:[0-9]+]]:vreg_64 = REG_SEQUENCE killed %{{[0-9]+}}, %subreg.sub0, killed %{{[0-9]+}}, %subreg.sub1
21-
# GCN-NEXT: V_CMP_LT_U32_e64 killed %[[OP0]].sub0, 12, implicit $exec
22-
236
--- |
247
define amdgpu_kernel void @const_to_sgpr(i32 addrspace(1)* nocapture %arg, i64 %id) {
258
bb:
@@ -99,6 +82,44 @@ liveins:
9982
- { reg: '$vgpr0', virtual-reg: '%2' }
10083
- { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
10184
body: |
85+
; GCN-LABEL: name: const_to_sgpr
86+
; GCN: bb.0.bb:
87+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
88+
; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
89+
; GCN-NEXT: {{ $}}
90+
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
91+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0
93+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 11, 0
94+
; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
95+
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
96+
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
97+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
98+
; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[REG_SEQUENCE]].sub0, [[S_LOAD_DWORDX2_IMM1]].sub0, implicit-def $scc
99+
; GCN-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADDC_U32 0, [[S_LOAD_DWORDX2_IMM1]].sub1, implicit-def dead $scc, implicit $scc
100+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_]], %subreg.sub0, killed [[S_ADDC_U32_]], %subreg.sub1
101+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1048576, implicit $exec
102+
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
103+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, killed [[V_MOV_B32_e32_1]], %subreg.sub1
104+
; GCN-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U64_e64 killed [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], implicit $exec
105+
; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U64_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
106+
; GCN-NEXT: S_BRANCH %bb.1
107+
; GCN-NEXT: {{ $}}
108+
; GCN-NEXT: bb.1.bb1:
109+
; GCN-NEXT: successors: %bb.2(0x80000000)
110+
; GCN-NEXT: {{ $}}
111+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE]], 2, implicit-def dead $scc
112+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 61440
113+
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
114+
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_2]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1
115+
; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub1_sub2_sub3_sub4_sub5, killed [[REG_SEQUENCE3]], %subreg.sub1_sub2_sub3_sub4_sub5_sub6
116+
; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
117+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_LSHL_B64_]]
118+
; GCN-NEXT: BUFFER_STORE_DWORD_ADDR64 killed [[V_MOV_B32_e32_2]], killed [[COPY3]], killed [[REG_SEQUENCE4]], 0, 0, 0, 0, 0, implicit $exec
119+
; GCN-NEXT: {{ $}}
120+
; GCN-NEXT: bb.2.bb2:
121+
; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
122+
; GCN-NEXT: S_ENDPGM 0
102123
bb.0.bb:
103124
successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
104125
liveins: $vgpr0, $sgpr0_sgpr1
@@ -197,6 +218,50 @@ liveins:
197218
- { reg: '$vgpr0', virtual-reg: '%2' }
198219
- { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
199220
body: |
221+
; GCN-LABEL: name: const_to_sgpr_multiple_use
222+
; GCN: bb.0.bb:
223+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
224+
; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
225+
; GCN-NEXT: {{ $}}
226+
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
227+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
228+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0
229+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 11, 0
230+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 13, 0
231+
; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
232+
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
233+
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
234+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
235+
; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[REG_SEQUENCE]].sub0, [[S_LOAD_DWORDX2_IMM1]].sub0, implicit-def $scc
236+
; GCN-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADDC_U32 0, [[S_LOAD_DWORDX2_IMM1]].sub1, implicit-def dead $scc, implicit $scc
237+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_]], %subreg.sub0, killed [[S_ADDC_U32_]], %subreg.sub1
238+
; GCN-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[REG_SEQUENCE]].sub0, [[S_LOAD_DWORDX2_IMM2]].sub0, implicit-def $scc
239+
; GCN-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32_xm0 = S_ADDC_U32 0, [[S_LOAD_DWORDX2_IMM2]].sub1, implicit-def dead $scc, implicit $scc
240+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_1]], %subreg.sub0, killed [[S_ADDC_U32_1]], %subreg.sub1
241+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1048576, implicit $exec
242+
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
243+
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, killed [[V_MOV_B32_e32_1]], %subreg.sub1
244+
; GCN-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U64_e64 killed [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], implicit $exec
245+
; GCN-NEXT: [[V_CMP_LT_U64_e64_1:%[0-9]+]]:sreg_64 = V_CMP_LT_U64_e64 killed [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], implicit $exec
246+
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 killed [[V_CMP_LT_U64_e64_]], killed [[V_CMP_LT_U64_e64_1]], implicit-def dead $scc
247+
; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[S_AND_B64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
248+
; GCN-NEXT: S_BRANCH %bb.1
249+
; GCN-NEXT: {{ $}}
250+
; GCN-NEXT: bb.1.bb1:
251+
; GCN-NEXT: successors: %bb.2(0x80000000)
252+
; GCN-NEXT: {{ $}}
253+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE]], 2, implicit-def dead $scc
254+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 61440
255+
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
256+
; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_2]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1
257+
; GCN-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub1_sub2_sub3_sub4_sub5, killed [[REG_SEQUENCE4]], %subreg.sub1_sub2_sub3_sub4_sub5_sub6
258+
; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
259+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_LSHL_B64_]]
260+
; GCN-NEXT: BUFFER_STORE_DWORD_ADDR64 killed [[V_MOV_B32_e32_2]], killed [[COPY3]], killed [[REG_SEQUENCE5]], 0, 0, 0, 0, 0, implicit $exec
261+
; GCN-NEXT: {{ $}}
262+
; GCN-NEXT: bb.2.bb2:
263+
; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
264+
; GCN-NEXT: S_ENDPGM 0
200265
bb.0.bb:
201266
successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
202267
liveins: $vgpr0, $sgpr0_sgpr1
@@ -294,6 +359,41 @@ liveins:
294359
- { reg: '$vgpr0', virtual-reg: '%2' }
295360
- { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
296361
body: |
362+
; GCN-LABEL: name: const_to_sgpr_subreg
363+
; GCN: bb.0.bb:
364+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
365+
; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
366+
; GCN-NEXT: {{ $}}
367+
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
368+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
369+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0
370+
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 11, 0
371+
; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
372+
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
373+
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
374+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
375+
; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[REG_SEQUENCE]].sub0, [[S_LOAD_DWORDX2_IMM1]].sub0, implicit-def $scc
376+
; GCN-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADDC_U32 0, [[S_LOAD_DWORDX2_IMM1]].sub1, implicit-def dead $scc, implicit $scc
377+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_]], %subreg.sub0, killed [[S_ADDC_U32_]], %subreg.sub1
378+
; GCN-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 killed [[REG_SEQUENCE1]].sub0, 12, implicit $exec
379+
; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
380+
; GCN-NEXT: S_BRANCH %bb.1
381+
; GCN-NEXT: {{ $}}
382+
; GCN-NEXT: bb.1.bb1:
383+
; GCN-NEXT: successors: %bb.2(0x80000000)
384+
; GCN-NEXT: {{ $}}
385+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE]], 2, implicit-def dead $scc
386+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 61440
387+
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
388+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_2]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1
389+
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub1_sub2_sub3_sub4_sub5, killed [[REG_SEQUENCE2]], %subreg.sub1_sub2_sub3_sub4_sub5_sub6
390+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
391+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_LSHL_B64_]]
392+
; GCN-NEXT: BUFFER_STORE_DWORD_ADDR64 killed [[V_MOV_B32_e32_]], killed [[COPY3]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec
393+
; GCN-NEXT: {{ $}}
394+
; GCN-NEXT: bb.2.bb2:
395+
; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
396+
; GCN-NEXT: S_ENDPGM 0
297397
bb.0.bb:
298398
successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
299399
liveins: $vgpr0, $sgpr0_sgpr1

0 commit comments

Comments
 (0)