Skip to content

Commit 50ca316

Browse files
Update affected test
1 parent e604cd6 commit 50ca316

14 files changed

+804
-658
lines changed

llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
148148
define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
149149
; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
150150
; GFX9: ; %bb.0:
151-
; GFX9-NEXT: s_add_i32 s1, s1, 1
152151
; GFX9-NEXT: s_add_i32 s3, s3, 1
152+
; GFX9-NEXT: s_add_i32 s1, s1, 1
153153
; GFX9-NEXT: ; return to shader part epilog
154154
%add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
155155
ret <2 x i64> %add
@@ -158,8 +158,8 @@ define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64>
158158
define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
159159
; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
160160
; GFX9: ; %bb.0:
161-
; GFX9-NEXT: s_add_i32 s1, s1, 1
162161
; GFX9-NEXT: s_add_i32 s3, s3, 2
162+
; GFX9-NEXT: s_add_i32 s1, s1, 1
163163
; GFX9-NEXT: ; return to shader part epilog
164164
%add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
165165
ret <2 x i64> %add

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,11 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32
110110
; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
111111
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
112112
; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
113+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
113114
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
114-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY8]]
115-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY9]]
115+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
116+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
117+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
116118
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
117119
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
118120
ret double %ret
@@ -136,9 +138,11 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32>
136138
; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
137139
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
138140
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
141+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
139142
; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
140-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY9]]
141-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY10]]
143+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
144+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
145+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
142146
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
143147
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
144148
ret double %ret
@@ -162,9 +166,11 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32>
162166
; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
163167
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
164168
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
169+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
165170
; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
166-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY9]]
167-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY10]]
171+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
172+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
173+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
168174
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
169175
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
170176
ret double %ret
@@ -190,9 +196,11 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32
190196
; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
191197
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
192198
; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
199+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
193200
; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
194-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY10]]
195-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY11]]
201+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
202+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
203+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
196204
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
197205
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
198206
ret double %ret
@@ -334,9 +342,11 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr
334342
; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
335343
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
336344
; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
345+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
337346
; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
338-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY12]]
339-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY13]]
347+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
348+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
349+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
340350
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
341351
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
342352
ret double %ret
@@ -366,9 +376,11 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr a
366376
; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
367377
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
368378
; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
379+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
369380
; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
370-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY13]]
371-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY14]]
381+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
382+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
383+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
372384
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
373385
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
374386
ret double %ret
@@ -398,9 +410,11 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr a
398410
; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
399411
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
400412
; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
413+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
401414
; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
402-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY13]]
403-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY14]]
415+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
416+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
417+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
404418
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
405419
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
406420
ret double %ret
@@ -432,9 +446,11 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr
432446
; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
433447
; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
434448
; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
449+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
435450
; GFX90A_GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
436-
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY14]]
437-
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY15]]
451+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
452+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
453+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
438454
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
439455
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
440456
ret double %ret

llvm/test/CodeGen/AMDGPU/constrained-shift.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -168,26 +168,26 @@ define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
168168
define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) {
169169
; CHECK-LABEL: s_csh_v4i32:
170170
; CHECK: ; %bb.0:
171-
; CHECK-NEXT: s_lshl_b32 s8, s0, s4
172-
; CHECK-NEXT: s_lshl_b32 s9, s1, s5
173-
; CHECK-NEXT: s_lshl_b32 s10, s2, s6
174-
; CHECK-NEXT: s_lshl_b32 s11, s3, s7
175-
; CHECK-NEXT: s_lshr_b32 s12, s0, s4
176-
; CHECK-NEXT: s_lshr_b32 s13, s1, s5
177-
; CHECK-NEXT: s_lshr_b32 s14, s2, s6
178-
; CHECK-NEXT: s_lshr_b32 s15, s3, s7
179-
; CHECK-NEXT: s_ashr_i32 s3, s3, s7
180-
; CHECK-NEXT: s_ashr_i32 s2, s2, s6
181-
; CHECK-NEXT: s_ashr_i32 s1, s1, s5
171+
; CHECK-NEXT: s_lshl_b32 s8, s3, s7
172+
; CHECK-NEXT: s_lshl_b32 s9, s2, s6
173+
; CHECK-NEXT: s_lshl_b32 s10, s1, s5
174+
; CHECK-NEXT: s_lshl_b32 s11, s0, s4
175+
; CHECK-NEXT: s_lshr_b32 s12, s3, s7
176+
; CHECK-NEXT: s_lshr_b32 s13, s2, s6
177+
; CHECK-NEXT: s_lshr_b32 s14, s1, s5
178+
; CHECK-NEXT: s_lshr_b32 s15, s0, s4
182179
; CHECK-NEXT: s_ashr_i32 s0, s0, s4
180+
; CHECK-NEXT: s_ashr_i32 s1, s1, s5
181+
; CHECK-NEXT: s_ashr_i32 s2, s2, s6
182+
; CHECK-NEXT: s_ashr_i32 s3, s3, s7
183183
; CHECK-NEXT: s_add_i32 s4, s11, s15
184184
; CHECK-NEXT: s_add_i32 s5, s10, s14
185185
; CHECK-NEXT: s_add_i32 s6, s9, s13
186186
; CHECK-NEXT: s_add_i32 s7, s8, s12
187-
; CHECK-NEXT: s_add_i32 s0, s7, s0
188-
; CHECK-NEXT: s_add_i32 s1, s6, s1
189-
; CHECK-NEXT: s_add_i32 s2, s5, s2
190-
; CHECK-NEXT: s_add_i32 s3, s4, s3
187+
; CHECK-NEXT: s_add_i32 s3, s7, s3
188+
; CHECK-NEXT: s_add_i32 s2, s6, s2
189+
; CHECK-NEXT: s_add_i32 s1, s5, s1
190+
; CHECK-NEXT: s_add_i32 s0, s4, s0
191191
; CHECK-NEXT: ; return to shader part epilog
192192
;
193193
; GISEL-LABEL: s_csh_v4i32:

llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) {
1010
; CHECK-NEXT: {{ $}}
1111
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
1212
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
13-
; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
14-
; CHECK-NEXT: $sgpr0 = COPY %3
13+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
14+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
15+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY2]], implicit $exec
16+
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1517
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
1618
%result = or disjoint i32 %a, %b
1719
ret i32 %result
@@ -26,10 +28,14 @@ define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> in
2628
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
2729
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
2830
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
29-
; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
30-
; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
31-
; CHECK-NEXT: $sgpr0 = COPY %5
32-
; CHECK-NEXT: $sgpr1 = COPY %6
31+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
32+
; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
33+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_1]]
34+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec
35+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
36+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec
37+
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
38+
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
3339
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
3440
%result = or disjoint <2 x i32> %a, %b
3541
ret <2 x i32> %result
@@ -42,8 +48,8 @@ define i32 @v_or_i32_disjoint(i32 %a, i32 %b) {
4248
; CHECK-NEXT: {{ $}}
4349
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4450
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45-
; CHECK-NEXT: %10:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
46-
; CHECK-NEXT: $vgpr0 = COPY %10
51+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
52+
; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
4753
; CHECK-NEXT: SI_RETURN implicit $vgpr0
4854
%result = or disjoint i32 %a, %b
4955
ret i32 %result
@@ -58,10 +64,10 @@ define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) {
5864
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
5965
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6066
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61-
; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
62-
; CHECK-NEXT: %13:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
63-
; CHECK-NEXT: $vgpr0 = COPY %12
64-
; CHECK-NEXT: $vgpr1 = COPY %13
67+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
68+
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
69+
; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
70+
; CHECK-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]]
6571
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
6672
%result = or disjoint <2 x i32> %a, %b
6773
ret <2 x i32> %result
@@ -78,11 +84,15 @@ define amdgpu_ps i64 @s_or_i64_disjoint(i64 inreg %a, i64 inreg %b) {
7884
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
7985
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
8086
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
81-
; CHECK-NEXT: %7:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
82-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %7.sub1
83-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY %7.sub0
84-
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]]
85-
; CHECK-NEXT: $sgpr1 = COPY [[COPY4]]
87+
; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
88+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
89+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub0
90+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
91+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY6]], implicit $exec
92+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
93+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY7]], implicit $exec
94+
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
95+
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
8696
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
8797
%result = or disjoint i64 %a, %b
8898
ret i64 %result

0 commit comments

Comments
 (0)