Skip to content

Commit cc397c2

Browse files
arsenmmemfrob
authored andcommitted
AMDGPU: Support commuting register and global operand
1 parent 1495c33 commit cc397c2

File tree

3 files changed

+49
-4
lines changed

3 files changed

+49
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1857,7 +1857,10 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
18571857
RegOp.ChangeToImmediate(NonRegOp.getImm());
18581858
else if (NonRegOp.isFI())
18591859
RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1860-
else
1860+
else if (NonRegOp.isGlobal()) {
1861+
RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(),
1862+
NonRegOp.getTargetFlags());
1863+
} else
18611864
return nullptr;
18621865

18631866
// Make sure we don't reinterpret a subreg index in the target flags.

llvm/test/CodeGen/AMDGPU/lds-relocs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
; GCN: v_mov_b32_e32 v1, lds.external@abs32@lo ; encoding: [0xff,0x02,0x02,0x7e,A,A,A,A]
3636
; GCN-NEXT: ; fixup A - offset: 4, value: lds.external@abs32@lo, kind: FK_Data_4{{$}}
3737
;
38-
; GCN: s_add_i32 s0, lds.defined@abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x81,A,A,A,A]
38+
; GCN: s_add_i32 s0, s0, lds.defined@abs32@lo ; encoding: [0x00,0xff,0x00,0x81,A,A,A,A]
3939
; GCN-NEXT: ; fixup A - offset: 4, value: lds.defined@abs32@lo, kind: FK_Data_4{{$}}
4040
;
4141
; GCN: .globl lds.external

llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
define void @commute_instruction_subreg_target_flag() { ret void }
66
define void @commute_target_flag_frame_index() { ret void }
77
define void @commute_target_flag_global() { ret void }
8+
define void @commute_target_flag_global_offset() { ret void }
9+
define void @commute_target_flag_global_offset_mismatch() { ret void }
10+
811
declare void @func()
12+
@gv = external addrspace(1) global i32
913

1014
...
1115

@@ -64,11 +68,49 @@ body: |
6468
; CHECK: liveins: $sgpr0_sgpr1
6569
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
6670
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
67-
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc
68-
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
71+
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
6972
%0:sreg_64 = COPY $sgpr0_sgpr1
7073
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
7174
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
7275
S_ENDPGM 0, implicit %1, implicit %2
7376
7477
...
78+
79+
---
80+
name: commute_target_flag_global_offset
81+
tracksRegLiveness: true
82+
body: |
83+
bb.0:
84+
liveins: $sgpr0_sgpr1
85+
86+
; CHECK-LABEL: name: commute_target_flag_global_offset
87+
; CHECK: liveins: $sgpr0_sgpr1
88+
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
89+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
90+
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
91+
%0:sreg_64 = COPY $sgpr0_sgpr1
92+
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
93+
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 4, %0.sub0, implicit-def dead $scc
94+
S_ENDPGM 0, implicit %1, implicit %2
95+
96+
...
97+
98+
---
99+
name: commute_target_flag_global_offset_mismatch
100+
tracksRegLiveness: true
101+
body: |
102+
bb.0:
103+
liveins: $sgpr0_sgpr1
104+
105+
; CHECK-LABEL: name: commute_target_flag_global_offset_mismatch
106+
; CHECK: liveins: $sgpr0_sgpr1
107+
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
108+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
109+
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, [[COPY]].sub0, implicit-def dead $scc
110+
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
111+
%0:sreg_64 = COPY $sgpr0_sgpr1
112+
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
113+
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, %0.sub0, implicit-def dead $scc
114+
S_ENDPGM 0, implicit %1, implicit %2
115+
116+
...

0 commit comments

Comments
 (0)