Skip to content

Commit e4464bf

Browse files
committed
AMDGPU/GlobalISel: Select scalar v2s16 G_BUILD_VECTOR
1 parent bc763c4 commit e4464bf

File tree

5 files changed

+280
-25
lines changed

5 files changed

+280
-25
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
100100
def : GINodeEquiv<G_CTPOP, ctpop>;
101101
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
102102
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
103+
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
103104
def : GINodeEquiv<G_FCEIL, fceil>;
104105
def : GINodeEquiv<G_FCOS, fcos>;
105106
def : GINodeEquiv<G_FSIN, fsin>;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
484484

485485
const unsigned SrcSize = SrcTy.getSizeInBits();
486486
if (SrcSize < 32)
487-
return false;
487+
return selectImpl(MI, *CoverageInfo);
488488

489489
const DebugLoc &DL = MI.getDebugLoc();
490490
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,20 +158,30 @@ def brtarget : Operand<OtherVT>;
158158
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
159159
(ops node:$src0),
160160
(op $src0),
161-
[{ return N->hasOneUse(); }]
162-
>;
161+
[{ return N->hasOneUse(); }]> {
162+
163+
let GISelPredicateCode = [{
164+
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
165+
}];
166+
}
163167

164168
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
165169
(ops node:$src0, node:$src1),
166170
(op $src0, $src1),
167-
[{ return N->hasOneUse(); }]
168-
>;
171+
[{ return N->hasOneUse(); }]> {
172+
let GISelPredicateCode = [{
173+
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
174+
}];
175+
}
169176

170177
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
171178
(ops node:$src0, node:$src1, node:$src2),
172179
(op $src0, $src1, $src2),
173-
[{ return N->hasOneUse(); }]
174-
>;
180+
[{ return N->hasOneUse(); }]> {
181+
let GISelPredicateCode = [{
182+
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
183+
}];
184+
}
175185

176186
let Properties = [SDNPCommutative, SDNPAssociative] in {
177187
def smax_oneuse : HasOneUseBinOp<smax>;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,54 +1793,59 @@ def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
17931793
// COPY is workaround tablegen bug from multiple outputs
17941794
// from S_LSHL_B32's multiple outputs from implicit scc def.
17951795
def : GCNPat <
1796-
(v2i16 (build_vector (i16 0), i16:$src1)),
1797-
(v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
1796+
(v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
1797+
(S_LSHL_B32 SReg_32:$src1, (i16 16))
17981798
>;
17991799

18001800
def : GCNPat <
1801-
(v2i16 (build_vector i16:$src0, (i16 undef))),
1802-
(v2i16 (COPY $src0))
1801+
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
1802+
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
1803+
>;
1804+
1805+
def : GCNPat <
1806+
(v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))),
1807+
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
18031808
>;
18041809

18051810
def : GCNPat <
18061811
(v2f16 (build_vector f16:$src0, (f16 undef))),
1807-
(v2f16 (COPY $src0))
1812+
(COPY $src0)
18081813
>;
18091814

18101815
def : GCNPat <
1811-
(v2i16 (build_vector (i16 undef), i16:$src1)),
1812-
(v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
1816+
(v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))),
1817+
(S_LSHL_B32 SReg_32:$src1, (i32 16))
18131818
>;
18141819

18151820
def : GCNPat <
1816-
(v2f16 (build_vector (f16 undef), f16:$src1)),
1817-
(v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
1821+
(v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))),
1822+
(S_LSHL_B32 SReg_32:$src1, (i32 16))
18181823
>;
18191824

18201825
let SubtargetPredicate = HasVOP3PInsts in {
18211826
def : GCNPat <
1822-
(v2i16 (build_vector i16:$src0, i16:$src1)),
1823-
(v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
1827+
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
1828+
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
18241829
>;
18251830

18261831
// With multiple uses of the shift, this will duplicate the shift and
18271832
// increase register pressure.
18281833
def : GCNPat <
1829-
(v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
1830-
(v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
1834+
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
1835+
(v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1))
18311836
>;
18321837

18331838

18341839
def : GCNPat <
1835-
(v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
1836-
(i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
1837-
(v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
1840+
(v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
1841+
(i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
1842+
(S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
18381843
>;
18391844

18401845
// TODO: Should source modifiers be matched to v_pack_b32_f16?
18411846
def : GCNPat <
1842-
(v2f16 (build_vector f16:$src0, f16:$src1)),
1843-
(v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
1847+
(v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
1848+
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
18441849
>;
18451850

18461851
} // End SubtargetPredicate = HasVOP3PInsts
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
3+
4+
---
5+
name: test_build_vector_s_v2s16_s_s16_s_s16
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
10+
body: |
11+
bb.0:
12+
liveins: $sgpr0, $sgpr1
13+
14+
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16
15+
; GFX9: liveins: $sgpr0, $sgpr1
16+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
17+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
18+
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]]
19+
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
20+
%0:sgpr(s32) = COPY $sgpr0
21+
%1:sgpr(s32) = COPY $sgpr1
22+
23+
%2:sgpr(s16) = G_TRUNC %0
24+
%3:sgpr(s16) = G_TRUNC %1
25+
26+
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3
27+
S_ENDPGM 0, implicit %4
28+
...
29+
30+
---
31+
name: test_build_vector_s_pack_lh
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
36+
body: |
37+
bb.0:
38+
liveins: $sgpr0, $sgpr1
39+
40+
; GFX9-LABEL: name: test_build_vector_s_pack_lh
41+
; GFX9: liveins: $sgpr0, $sgpr1
42+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
43+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
44+
; GFX9: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]]
45+
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]]
46+
%0:sgpr(s32) = COPY $sgpr0
47+
%1:sgpr(s32) = COPY $sgpr1
48+
49+
%2:sgpr(s32) = G_CONSTANT i32 16
50+
%3:sgpr(s32) = G_LSHR %1, %2
51+
52+
%4:sgpr(s16) = G_TRUNC %0
53+
%5:sgpr(s16) = G_TRUNC %3
54+
55+
%6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5
56+
S_ENDPGM 0, implicit %6
57+
...
58+
59+
# There is no s_pack_hl_b32
60+
---
61+
name: test_build_vector_s_pack_lh_swapped
62+
legalized: true
63+
regBankSelected: true
64+
tracksRegLiveness: true
65+
66+
body: |
67+
bb.0:
68+
liveins: $sgpr0, $sgpr1
69+
70+
; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped
71+
; GFX9: liveins: $sgpr0, $sgpr1
72+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
73+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
74+
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
75+
; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
76+
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]]
77+
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
78+
%0:sgpr(s32) = COPY $sgpr0
79+
%1:sgpr(s32) = COPY $sgpr1
80+
81+
%2:sgpr(s32) = G_CONSTANT i32 16
82+
%3:sgpr(s32) = G_LSHR %1, %2
83+
84+
%4:sgpr(s16) = G_TRUNC %0
85+
%5:sgpr(s16) = G_TRUNC %3
86+
87+
%6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4
88+
S_ENDPGM 0, implicit %6
89+
...
90+
91+
---
92+
name: test_build_vector_s_pack_hh
93+
legalized: true
94+
regBankSelected: true
95+
tracksRegLiveness: true
96+
97+
body: |
98+
bb.0:
99+
liveins: $sgpr0, $sgpr1
100+
101+
; GFX9-LABEL: name: test_build_vector_s_pack_hh
102+
; GFX9: liveins: $sgpr0, $sgpr1
103+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
104+
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
105+
; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]]
106+
; GFX9: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]]
107+
%0:sgpr(s32) = COPY $sgpr0
108+
%1:sgpr(s32) = COPY $sgpr1
109+
110+
%2:sgpr(s32) = G_CONSTANT i32 16
111+
%3:sgpr(s32) = G_LSHR %0, %2
112+
%4:sgpr(s32) = G_LSHR %1, %2
113+
114+
%5:sgpr(s16) = G_TRUNC %3
115+
%6:sgpr(s16) = G_TRUNC %4
116+
117+
%7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6
118+
S_ENDPGM 0, implicit %7
119+
...
120+
121+
# TODO: Should this use an and instead?
122+
---
123+
name: test_build_vector_s_v2s16_s_s16_s_0_s16
124+
legalized: true
125+
regBankSelected: true
126+
tracksRegLiveness: true
127+
128+
body: |
129+
bb.0:
130+
liveins: $sgpr0
131+
132+
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16
133+
; GFX9: liveins: $sgpr0
134+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
135+
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
136+
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
137+
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
138+
%0:sgpr(s32) = COPY $sgpr0
139+
140+
%1:sgpr(s16) = G_TRUNC %0
141+
%2:sgpr(s16) = G_CONSTANT i16 0
142+
143+
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
144+
S_ENDPGM 0, implicit %3
145+
...
146+
147+
---
148+
name: test_build_vector_s_v2s16_s_0_s16_s_s16
149+
legalized: true
150+
regBankSelected: true
151+
tracksRegLiveness: true
152+
153+
body: |
154+
bb.0:
155+
liveins: $sgpr0
156+
157+
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16
158+
; GFX9: liveins: $sgpr0
159+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
160+
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
161+
; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
162+
%0:sgpr(s32) = COPY $sgpr0
163+
164+
%1:sgpr(s16) = G_CONSTANT i16 0
165+
%2:sgpr(s16) = G_TRUNC %0
166+
167+
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
168+
S_ENDPGM 0, implicit %3
169+
...
170+
171+
---
172+
name: test_build_vector_v_v2s16_v_s16_s_undef_s16
173+
legalized: true
174+
regBankSelected: true
175+
tracksRegLiveness: true
176+
177+
body: |
178+
bb.0:
179+
liveins: $vgpr0
180+
181+
; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16
182+
; GFX9: liveins: $vgpr0
183+
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
184+
; GFX9: S_ENDPGM 0, implicit [[COPY]]
185+
%0:vgpr(s32) = COPY $vgpr0
186+
187+
%1:vgpr(s16) = G_TRUNC %0
188+
%2:sgpr(s16) = G_IMPLICIT_DEF
189+
190+
%3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
191+
S_ENDPGM 0, implicit %3
192+
...
193+
194+
---
195+
name: test_build_vector_s_v2s16_s_s16_s_undef_s16
196+
legalized: true
197+
regBankSelected: true
198+
tracksRegLiveness: true
199+
200+
body: |
201+
bb.0:
202+
liveins: $sgpr0
203+
204+
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16
205+
; GFX9: liveins: $sgpr0
206+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
207+
; GFX9: S_ENDPGM 0, implicit [[COPY]]
208+
%0:sgpr(s32) = COPY $sgpr0
209+
210+
%1:sgpr(s16) = G_TRUNC %0
211+
%2:sgpr(s16) = G_IMPLICIT_DEF
212+
213+
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
214+
S_ENDPGM 0, implicit %3
215+
...
216+
217+
---
218+
name: test_build_vector_s_v2s16_s_undef_s16_s_s16
219+
legalized: true
220+
regBankSelected: true
221+
tracksRegLiveness: true
222+
223+
body: |
224+
bb.0:
225+
liveins: $sgpr0
226+
227+
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16
228+
; GFX9: liveins: $sgpr0
229+
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
230+
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
231+
; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
232+
%0:sgpr(s32) = COPY $sgpr0
233+
234+
%1:sgpr(s16) = G_IMPLICIT_DEF
235+
%2:sgpr(s16) = G_TRUNC %0
236+
237+
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
238+
S_ENDPGM 0, implicit %3
239+
...

0 commit comments

Comments
 (0)