Skip to content

Commit 26a7781

Browse files
author
git apple-llvm automerger
committed
Merge commit 'f8d3bdf6a272' from llvm.org/main into next
2 parents 8cd17bb + f8d3bdf commit 26a7781

File tree

3 files changed

+72
-5
lines changed

3 files changed

+72
-5
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,7 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
634634
switch (MI.getOpcode()) {
635635
default:
636636
continue;
637-
case AMDGPU::COPY:
638-
case AMDGPU::WQM:
639-
case AMDGPU::STRICT_WQM:
640-
case AMDGPU::SOFT_WQM:
641-
case AMDGPU::STRICT_WWM: {
637+
case AMDGPU::COPY: {
642638
const TargetRegisterClass *SrcRC, *DstRC;
643639
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
644640

@@ -662,6 +658,10 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
662658

663659
break;
664660
}
661+
case AMDGPU::WQM:
662+
case AMDGPU::STRICT_WQM:
663+
case AMDGPU::SOFT_WQM:
664+
case AMDGPU::STRICT_WWM:
665665
case AMDGPU::INSERT_SUBREG:
666666
case AMDGPU::PHI:
667667
case AMDGPU::REG_SEQUENCE: {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
3+
4+
define amdgpu_gs i32 @main() {
5+
; CHECK-LABEL: main:
6+
; CHECK: ; %bb.0: ; %bb
7+
; CHECK-NEXT: s_bitcmp1_b32 0, 0
8+
; CHECK-NEXT: s_mov_b32 s0, 0
9+
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
10+
; CHECK-NEXT: s_or_saveexec_b32 s2, -1
11+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
12+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
13+
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
14+
; CHECK-NEXT: s_mov_b32 exec_lo, s2
15+
; CHECK-NEXT: s_or_b32 s0, s0, s1
16+
; CHECK-NEXT: s_wait_alu 0xfffe
17+
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
18+
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
19+
; CHECK-NEXT: s_wait_alu 0xfffe
20+
; CHECK-NEXT: s_xor_b32 s0, s0, -1
21+
; CHECK-NEXT: s_wait_alu 0xfffe
22+
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
23+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
24+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
25+
; CHECK-NEXT: s_wait_alu 0xf1ff
26+
; CHECK-NEXT: ; return to shader part epilog
27+
bb:
28+
%i = call i1 @llvm.amdgcn.readfirstlane.i1(i1 false)
29+
br label %bb1
30+
31+
bb1:
32+
%i2 = zext i1 %i to i32
33+
%i3 = call i32 @llvm.amdgcn.wwm.i32(i32 0)
34+
%i4 = call i32 @llvm.amdgcn.wwm.i32(i32 %i2)
35+
%i5 = trunc i32 %i4 to i1
36+
%i6 = trunc i32 %i3 to i1
37+
%i7 = or i1 %i6, %i5
38+
%i8 = select i1 %i7, i32 0, i32 1
39+
ret i32 %i8
40+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-fix-sgpr-copies %s -o - | FileCheck %s
3+
4+
---
5+
name: main
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
; CHECK-LABEL: name: main
10+
; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
11+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
12+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF1]], implicit $exec
13+
; CHECK-NEXT: early-clobber %2:sreg_32 = STRICT_WWM killed undef [[V_READFIRSTLANE_B32_]], implicit $exec
14+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed undef [[DEF]], killed undef %2, implicit-def dead $scc
15+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed undef [[S_OR_B32_]], implicit-def dead $scc
16+
; CHECK-NEXT: S_CMP_EQ_U32 killed undef [[S_AND_B32_]], 1, implicit-def $scc
17+
; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 killed undef [[S_AND_B32_]], killed undef [[S_AND_B32_]], implicit-def dead $scc
18+
; CHECK-NEXT: SI_RETURN_TO_EPILOG undef $sgpr0
19+
%0:sreg_32 = IMPLICIT_DEF
20+
%1:vgpr_32 = IMPLICIT_DEF
21+
early-clobber %2:sreg_32 = STRICT_WWM killed undef %1, implicit $exec
22+
%3:sreg_32 = S_OR_B32 killed undef %0, killed undef %2, implicit-def dead $scc
23+
%4:sreg_32 = S_AND_B32 1, killed undef %3, implicit-def dead $scc
24+
S_CMP_EQ_U32 killed undef %4, 1, implicit-def $scc
25+
%5:sreg_32_xm0_xexec = S_XOR_B32 killed undef %4, killed undef %4, implicit-def dead $scc
26+
SI_RETURN_TO_EPILOG undef $sgpr0
27+
...

0 commit comments

Comments
 (0)