Skip to content

Commit 1495c33

Browse files
arsenmmemfrob
authored andcommitted
AMDGPU: Fix handling of target flags when commuting instruction
If the original register operand had a subregister, it wasn't getting cleared. This resulted in reinterpreted the subreg index as unrecognized target flags, which produced unparseable MIR.
1 parent d5c2ba0 commit 1495c33

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,9 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
18601860
else
18611861
return nullptr;
18621862

1863+
// Make sure we don't reinterpret a subreg index in the target flags.
1864+
RegOp.setTargetFlags(NonRegOp.getTargetFlags());
1865+
18631866
NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
18641867
NonRegOp.setSubReg(SubReg);
18651868

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-cse -o - %s | FileCheck %s
3+
4+
--- |
5+
define void @commute_instruction_subreg_target_flag() { ret void }
6+
define void @commute_target_flag_frame_index() { ret void }
7+
define void @commute_target_flag_global() { ret void }
8+
declare void @func()
9+
10+
...
11+
12+
# Make sure the subreg index is cleared when commuting a register and immediate.
13+
14+
---
15+
name: commute_instruction_subreg_target_flag
16+
tracksRegLiveness: true
17+
body: |
18+
bb.0:
19+
liveins: $vgpr0_vgpr1
20+
21+
; CHECK-LABEL: name: commute_instruction_subreg_target_flag
22+
; CHECK: liveins: $vgpr0_vgpr1
23+
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
24+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]].sub1, 64, 0, implicit $exec
25+
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
26+
%0:vreg_64 = COPY $vgpr0_vgpr1
27+
%1:vgpr_32 = V_ADD_U32_e64 %0.sub1, 64, 0, implicit $exec
28+
%2:vgpr_32 = V_ADD_U32_e64 64, %0.sub1, 0, implicit $exec
29+
S_ENDPGM 0, implicit %1, implicit %2
30+
31+
...
32+
33+
# FIXME: Why doesn't this CSE?
34+
---
35+
name: commute_target_flag_frame_index
36+
tracksRegLiveness: true
37+
stack:
38+
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
39+
body: |
40+
bb.0:
41+
liveins: $vgpr0_vgpr1
42+
43+
; CHECK-LABEL: name: commute_target_flag_frame_index
44+
; CHECK: liveins: $vgpr0_vgpr1
45+
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
46+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, [[COPY]].sub0, 0, implicit $exec
47+
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
48+
%0:vreg_64 = COPY $vgpr0_vgpr1
49+
%1:vgpr_32 = V_ADD_U32_e64 %0.sub0, %stack.0, 0, implicit $exec
50+
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, %0.sub0, 0, implicit $exec
51+
S_ENDPGM 0, implicit %1, implicit %2
52+
53+
...
54+
55+
# FIXME: Handle commuting global variables
56+
---
57+
name: commute_target_flag_global
58+
tracksRegLiveness: true
59+
body: |
60+
bb.0:
61+
liveins: $sgpr0_sgpr1
62+
63+
; CHECK-LABEL: name: commute_target_flag_global
64+
; CHECK: liveins: $sgpr0_sgpr1
65+
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
66+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
67+
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc
68+
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
69+
%0:sreg_64 = COPY $sgpr0_sgpr1
70+
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
71+
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
72+
S_ENDPGM 0, implicit %1, implicit %2
73+
74+
...

0 commit comments

Comments
 (0)