Skip to content

Commit 618fa77

Browse files
committed
AMDGPU/GlobalISel: Select V_ADD3_U32/V_XOR3_B32
The other 3-op patterns should also be theoretically handled, but currently there's a bug in the inferred pattern complexity. I'm not sure what the error handling strategy should be for potential constant bus violations. I think the correct strategy is to never produce mixed SGPR and VGPR operands in a typical VOP instruction, which will trivially avoid them. However, it's possible to still have hand written MIR (or erroneously transformed code) with these operands. When these fold, the restriction will be violated. We currently don't have any verifiers for reg bank legality. For now, just ignore the restriction. It might be worth triggering a DAG fallback on verifier error.
1 parent 4faf71a commit 618fa77

10 files changed

+583
-109
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -570,9 +570,20 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
570570
}
571571

572572
return true;
573-
}]
574-
> {
573+
}]> {
575574
let PredicateCodeUsesOperands = 1;
575+
576+
// The divergence predicate is irrelevant in GlobalISel, as we have
577+
// proper register bank checks. We also force all VOP instruction
578+
// operands to VGPR, so we should not need to check the constant bus
579+
// restriction.
580+
//
581+
// FIXME: With unlucky SGPR operands, we could penalize code by
582+
// blocking folding SGPR->VGPR copies later.
583+
// FIXME: There's no register bank verifier
584+
// FIXME: Should add a way for the emitter to recognize this is a
585+
// trivially true predicate to eliminate the check.
586+
let GISelPredicateCode = [{return true;}];
576587
}
577588

578589
let SubtargetPredicate = isGFX9Plus in {
@@ -614,7 +625,7 @@ def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
614625
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
615626
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
616627
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
617-
(inst i32:$src0, i32:$src1, i32:$src2)
628+
(inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
618629
>;
619630

620631
def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ body: |
2121
; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
2222
; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec
2323
; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
24-
; GFX6: S_ENDPGM 0, implicit %9
24+
; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9
2525
; GFX9-LABEL: name: add_s32
2626
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
2727
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
@@ -30,7 +30,7 @@ body: |
3030
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
3131
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
3232
; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
33-
; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_2]]
33+
; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]]
3434
%0:sgpr(s32) = COPY $sgpr0
3535
%1:sgpr(s32) = COPY $sgpr1
3636
%2:vgpr(s32) = COPY $vgpr0
@@ -50,7 +50,7 @@ body: |
5050
; add vv
5151
%9:vgpr(s32) = G_ADD %8, %2
5252
53-
S_ENDPGM 0, implicit %9
53+
S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9
5454
5555
...
5656

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
3+
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
4+
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
5+
6+
---
7+
8+
name: add_s32_sgpr_sgpr_sgpr
9+
legalized: true
10+
regBankSelected: true
11+
tracksRegLiveness: true
12+
13+
body: |
14+
bb.0:
15+
liveins: $sgpr0, $sgpr1, $sgpr2
16+
; GFX8-LABEL: name: add_s32_sgpr_sgpr_sgpr
17+
; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2
18+
; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
19+
; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
20+
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
21+
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
22+
; GFX8: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
23+
; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
24+
; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr
25+
; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2
26+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
27+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
28+
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
29+
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
30+
; GFX9: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
31+
; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
32+
; GFX10-LABEL: name: add_s32_sgpr_sgpr_sgpr
33+
; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2
34+
; GFX10: $vcc_hi = IMPLICIT_DEF
35+
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
36+
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
37+
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
38+
; GFX10: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
39+
; GFX10: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc
40+
; GFX10: S_ENDPGM 0, implicit [[S_ADD_U32_1]]
41+
%0:sgpr(s32) = COPY $sgpr0
42+
%1:sgpr(s32) = COPY $sgpr1
43+
%2:sgpr(s32) = COPY $sgpr2
44+
%3:sgpr(s32) = G_ADD %0, %1
45+
%4:sgpr(s32) = G_ADD %3, %2
46+
S_ENDPGM 0, implicit %4
47+
...
48+
49+
---
50+
51+
name: add_s32_vgpr_vgpr_vgpr
52+
legalized: true
53+
regBankSelected: true
54+
tracksRegLiveness: true
55+
56+
body: |
57+
bb.0:
58+
liveins: $vgpr0, $vgpr1, $vgpr2
59+
; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr
60+
; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2
61+
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
62+
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
63+
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
64+
; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
65+
; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
66+
; GFX8: S_ENDPGM 0, implicit %4
67+
; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr
68+
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
69+
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
70+
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
71+
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
72+
; GFX9: [[V_ADD3_U32_:%[0-9]+]]:vgpr_32 = V_ADD3_U32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
73+
; GFX9: S_ENDPGM 0, implicit [[V_ADD3_U32_]]
74+
; GFX10-LABEL: name: add_s32_vgpr_vgpr_vgpr
75+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
76+
; GFX10: $vcc_hi = IMPLICIT_DEF
77+
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
78+
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
79+
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80+
; GFX10: [[V_ADD3_U32_:%[0-9]+]]:vgpr_32 = V_ADD3_U32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
81+
; GFX10: S_ENDPGM 0, implicit [[V_ADD3_U32_]]
82+
%0:vgpr(s32) = COPY $vgpr0
83+
%1:vgpr(s32) = COPY $vgpr1
84+
%2:vgpr(s32) = COPY $vgpr2
85+
%3:vgpr(s32) = G_ADD %0, %1
86+
%4:vgpr(s32) = G_ADD %3, %2
87+
S_ENDPGM 0, implicit %4
88+
...
89+
90+
---
91+
92+
name: add_s32_vgpr_vgpr_vgpr_multi_use
93+
legalized: true
94+
regBankSelected: true
95+
tracksRegLiveness: true
96+
97+
body: |
98+
bb.0:
99+
liveins: $vgpr0, $vgpr1, $vgpr2
100+
; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use
101+
; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2
102+
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
103+
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
104+
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
105+
; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
106+
; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
107+
; GFX8: S_ENDPGM 0, implicit %4, implicit %3
108+
; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use
109+
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
110+
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111+
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
112+
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
113+
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
114+
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
115+
; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]]
116+
; GFX10-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use
117+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
118+
; GFX10: $vcc_hi = IMPLICIT_DEF
119+
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
120+
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
121+
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
122+
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
123+
; GFX10: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec
124+
; GFX10: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]]
125+
%0:vgpr(s32) = COPY $vgpr0
126+
%1:vgpr(s32) = COPY $vgpr1
127+
%2:vgpr(s32) = COPY $vgpr2
128+
%3:vgpr(s32) = G_ADD %0, %1
129+
%4:vgpr(s32) = G_ADD %3, %2
130+
S_ENDPGM 0, implicit %4, implicit %3
131+
...
132+
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
3+
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
4+
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
5+
6+
---
7+
8+
name: or_s32_sgpr_sgpr_sgpr
9+
legalized: true
10+
regBankSelected: true
11+
tracksRegLiveness: true
12+
13+
body: |
14+
bb.0:
15+
liveins: $sgpr0, $sgpr1, $sgpr2
16+
; GFX8-LABEL: name: or_s32_sgpr_sgpr_sgpr
17+
; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2
18+
; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
19+
; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
20+
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
21+
; GFX8: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc
22+
; GFX8: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def dead $scc
23+
; GFX8: S_ENDPGM 0, implicit [[S_OR_B32_1]]
24+
; GFX9-LABEL: name: or_s32_sgpr_sgpr_sgpr
25+
; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2
26+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
27+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
28+
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
29+
; GFX9: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc
30+
; GFX9: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def dead $scc
31+
; GFX9: S_ENDPGM 0, implicit [[S_OR_B32_1]]
32+
; GFX10-LABEL: name: or_s32_sgpr_sgpr_sgpr
33+
; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2
34+
; GFX10: $vcc_hi = IMPLICIT_DEF
35+
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
36+
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
37+
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
38+
; GFX10: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc
39+
; GFX10: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def dead $scc
40+
; GFX10: S_ENDPGM 0, implicit [[S_OR_B32_1]]
41+
%0:sgpr(s32) = COPY $sgpr0
42+
%1:sgpr(s32) = COPY $sgpr1
43+
%2:sgpr(s32) = COPY $sgpr2
44+
%3:sgpr(s32) = G_OR %0, %1
45+
%4:sgpr(s32) = G_OR %3, %2
46+
S_ENDPGM 0, implicit %4
47+
...
48+
49+
---
50+
51+
name: or_s32_vgpr_vgpr_vgpr
52+
legalized: true
53+
regBankSelected: true
54+
tracksRegLiveness: true
55+
56+
body: |
57+
bb.0:
58+
liveins: $vgpr0, $vgpr1, $vgpr2
59+
; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr
60+
; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2
61+
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
62+
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
63+
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
64+
; GFX8: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
65+
; GFX8: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec
66+
; GFX8: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]]
67+
; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr
68+
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
69+
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
70+
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
71+
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
72+
; GFX9: [[V_OR3_B32_:%[0-9]+]]:vgpr_32 = V_OR3_B32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
73+
; GFX9: S_ENDPGM 0, implicit [[V_OR3_B32_]]
74+
; GFX10-LABEL: name: or_s32_vgpr_vgpr_vgpr
75+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
76+
; GFX10: $vcc_hi = IMPLICIT_DEF
77+
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
78+
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
79+
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80+
; GFX10: [[V_OR3_B32_:%[0-9]+]]:vgpr_32 = V_OR3_B32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
81+
; GFX10: S_ENDPGM 0, implicit [[V_OR3_B32_]]
82+
%0:vgpr(s32) = COPY $vgpr0
83+
%1:vgpr(s32) = COPY $vgpr1
84+
%2:vgpr(s32) = COPY $vgpr2
85+
%3:vgpr(s32) = G_OR %0, %1
86+
%4:vgpr(s32) = G_OR %3, %2
87+
S_ENDPGM 0, implicit %4
88+
...
89+
90+
---
91+
92+
name: or_s32_vgpr_vgpr_vgpr_multi_use
93+
legalized: true
94+
regBankSelected: true
95+
tracksRegLiveness: true
96+
97+
body: |
98+
bb.0:
99+
liveins: $vgpr0, $vgpr1, $vgpr2
100+
; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use
101+
; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2
102+
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
103+
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
104+
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
105+
; GFX8: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
106+
; GFX8: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec
107+
; GFX8: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]]
108+
; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use
109+
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
110+
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111+
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
112+
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
113+
; GFX9: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
114+
; GFX9: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec
115+
; GFX9: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]]
116+
; GFX10-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use
117+
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
118+
; GFX10: $vcc_hi = IMPLICIT_DEF
119+
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
120+
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
121+
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
122+
; GFX10: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
123+
; GFX10: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec
124+
; GFX10: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]]
125+
%0:vgpr(s32) = COPY $vgpr0
126+
%1:vgpr(s32) = COPY $vgpr1
127+
%2:vgpr(s32) = COPY $vgpr2
128+
%3:vgpr(s32) = G_OR %0, %1
129+
%4:vgpr(s32) = G_OR %3, %2
130+
S_ENDPGM 0, implicit %4, implicit %3
131+
...
132+

0 commit comments

Comments
 (0)